diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index dbbead3bb782e..03e7bef817cf7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1107,13 +1107,29 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(A); // Try to fold Not into compares by adjusting the predicate in-place. - if (isa(A) && A->getNumUsers() == 1) { - auto *WideCmp = cast(A); - if (WideCmp->getOpcode() == Instruction::ICmp || - WideCmp->getOpcode() == Instruction::FCmp) { + if (auto *WideCmp = dyn_cast(A)) { + if ((WideCmp->getOpcode() == Instruction::ICmp || + WideCmp->getOpcode() == Instruction::FCmp) && + all_of(WideCmp->users(), [&WideCmp](VPUser *U) { + return match(U, m_CombineOr(m_Not(m_Specific(WideCmp)), + m_Select(m_Specific(WideCmp), + m_VPValue(), m_VPValue()))); + })) { WideCmp->setPredicate( CmpInst::getInversePredicate(WideCmp->getPredicate())); - Def->replaceAllUsesWith(WideCmp); + for (VPUser *U : to_vector(WideCmp->users())) { + auto *R = cast(U); + if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X), + m_VPValue(Y)))) { + // select (cmp pred), x, y -> select (cmp inv_pred), y, x + R->setOperand(1, Y); + R->setOperand(2, X); + } else { + // not (cmp pred) -> cmp inv_pred + assert(match(R, m_Not(m_Specific(WideCmp))) && "Unexpected user"); + R->replaceAllUsesWith(WideCmp); + } + } // If WideCmp doesn't have a debug location, use the one from the // negation, to preserve the location. if (!WideCmp->getDebugLoc() && R.getDebugLoc()) diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 22667ae324d03..c04dc7b29a6a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -29,7 +29,7 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0:![0-9]+]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -71,7 +71,7 @@ define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %ou ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -113,7 +113,7 @@ define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -156,7 +156,7 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -198,7 +198,7 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_GATHER]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -243,7 +243,7 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[PTRS]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -302,7 +302,7 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 ; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> , <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -619,8 +619,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -660,8 +659,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP30]], <4 x i8> zeroinitializer ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[DST]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -706,8 +705,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -747,9 +745,9 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison +; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP26]], <4 x i8> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> poison, <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[DST]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 2c97863c182b9..0fdcf89deb902 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -17,8 +17,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -34,8 +33,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index d24e7e871239a..0316e8e39a6a4 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -20,8 +20,7 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 4) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -39,8 +38,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -190,8 +189,7 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 2) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -209,8 +207,8 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -287,8 +285,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -307,7 +304,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -384,8 +381,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -404,7 +400,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -481,8 +477,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -501,7 +496,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -573,8 +568,7 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] @@ -593,8 +587,8 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -667,8 +661,7 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -686,8 +679,8 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ] @@ -773,8 +766,7 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 4) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -792,8 +784,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -940,8 +932,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -962,7 +953,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1179,8 +1170,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1201,7 +1191,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1271,8 +1261,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1293,7 +1282,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1365,8 +1354,7 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1382,8 +1370,8 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP14]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1455,8 +1443,7 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1473,7 +1460,7 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP12]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index 9852f538c6f74..8417e22fe0d8a 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -180,8 +180,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -201,8 +200,8 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -278,8 +277,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -299,8 +297,8 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -458,8 +456,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_al ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -480,7 +477,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_al ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -566,8 +563,7 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -587,8 +583,8 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP14]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -678,8 +674,7 @@ define void @deref_assumption_loop_access_start_variable_unknown_range(i8 %v, pt ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -700,7 +695,7 @@ define void @deref_assumption_loop_access_start_variable_unknown_range(i8 %v, pt ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP16]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 65330aabf3c9a..ad6a1ec684e32 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1192,14 +1192,13 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> [[TMP8]] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1315,14 +1314,13 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI1:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]] -; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 343facb2ef698..cbf9cc11d9009 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -4978,8 +4978,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -4998,7 +4997,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: br label [[PRED_UREM_CONTINUE2]] ; CHECK: pred.urem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP13]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -5172,10 +5171,8 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ne <2 x i16> [[STEP_ADD]], zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.urem.if: @@ -5212,8 +5209,8 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE7]] ; UNROLL-NO-IC: pred.urem.continue7: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE5]] ], [ [[TMP24]], [[PRED_UREM_IF6]] ] -; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] -; UNROLL-NO-IC-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] +; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> [[TMP15]], <2 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> [[TMP25]], <2 x i16> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI8]] to <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index fbe57c81053fa..c822ded111c62 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -97,55 +97,54 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP25]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP26]] +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP25]], <4 x i32> [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -216,56 +215,55 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i64> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i64> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP13]], i64 [[TMP17]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP23]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i64> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP25]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP27:%.*]] = trunc <4 x i64> [[TMP26]] to <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]] +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26:%.*]] = trunc <4 x i64> [[TMP25]] to <4 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP27]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP26]], <4 x i32> [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index 02f324281bf4d..1eae53559d179 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -183,12 +183,11 @@ define i32 @test3(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2) -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -196,12 +195,12 @@ define i32 @test3(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index c5dc81b28db2b..9824879546c5a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -19,8 +19,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -37,7 +36,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> splat (i16 5786), <2 x i16> [[TMP10]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2