diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 875d501f2a3dc..c588453091fcc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1119,6 +1119,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + // Reassociate (x && y) && z -> x && (y && z) if x has multiple users. With + // tail folding it is likely that x is a header mask and can be simplified + // further. + if (match(Def, m_LogicalAnd(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), + m_VPValue(Z))) && + X->hasMoreThanOneUniqueUser()) + return Def->replaceAllUsesWith( + Builder.createLogicalAnd(X, Builder.createLogicalAnd(Y, Z))); + if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 3660b937da75d..d7c911389e9c2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -198,7 +198,9 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: [[BROADCAST_SPLAT57:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT56]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C_4]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT57]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE55:.*]] ] @@ -231,8 +233,7 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] ; CHECK: [[PRED_STORE_CONTINUE47]]: -; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[BROADCAST_SPLAT41]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]] ; CHECK-NEXT: [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1) ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP23]], i32 0 @@ -249,7 +250,7 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] ; CHECK: [[PRED_STORE_CONTINUE51]]: -; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[TMP33]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP37:%.*]] = or <2 x i1> [[TMP23]], [[TMP35]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 631328a9a0964..5a99f15b9f585 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -436,23 +436,17 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP27]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP12]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv8i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP14]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv8i16.nxv8p0( align 2 [[TMP20]], splat (i1 true), i32 [[TMP27]]) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_GATHER]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = select [[TMP15]], [[TMP17]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP18]], [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = select [[TMP17]], [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = xor [[TMP17]], splat (i1 true) -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP15]], [[TMP28]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP14]], [[TMP28]] +; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP17]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index db6185087bac5..8b212f4ef9706 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -34,18 +34,17 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv8i32() ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult [[TMP10]], [[BROADCAST_SPLAT8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP28:%.*]] = select [[TMP11]], [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP28]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], splat (i1 true) -; CHECK-NEXT: [[TMP29:%.*]] = select [[TMP11]], [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP15]], [[TMP29]] +; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP9]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], splat (i1 true) -; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP28]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP11]], [[TMP21]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index db54ca61f715b..54d738388ea73 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -65,35 +65,35 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 @@ -107,11 +107,11 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -158,35 +158,35 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 ; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 @@ -200,11 +200,11 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] ; CHECK: then.1: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 ; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false ; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] ; CHECK: then.2: ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] @@ -256,9 +256,9 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> , <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -346,10 +346,10 @@ define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index d695de6491baa..e25be6f867862 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -461,8 +461,8 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer ; COST-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) ; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) -; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer -; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer +; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer +; COST-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]]) ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]]) @@ -540,10 +540,10 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) ; FORCED-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) ; FORCED-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], splat (i1 true) -; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer -; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer +; FORCED-NEXT: [[TMP22:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP21]]) ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]]) ; FORCED-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer