diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 17b54f2ef9c05..fc9bb806895c6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1068,7 +1068,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X // && (Y || Z) and (X || !X) into true. This requires queuing newly created // recipes to be visited during simplification. - VPValue *X, *Y; + VPValue *X, *Y, *Z; if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) { @@ -1092,6 +1092,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + // (x && y) || (x && z) -> x && (y || z) + VPBuilder Builder(Def); + if (match(Def, m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)), + m_LogicalAnd(m_Deferred(X), m_VPValue(Z)))) && + // Simplify only if one of the operands has one use to avoid creating an + // extra recipe. + (!Def->getOperand(0)->hasMoreThanOneUniqueUser() || + !Def->getOperand(1)->hasMoreThanOneUniqueUser())) + return Def->replaceAllUsesWith( + Builder.createLogicalAnd(X, Builder.createOr(Y, Z))); + if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); @@ -1158,7 +1169,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { m_VPValue(X), m_SpecificInt(1)))) { Type *WideStepTy = TypeInfo.inferScalarType(Def); if (TypeInfo.inferScalarType(X) != WideStepTy) - X = VPBuilder(Def).createWidenCast(Instruction::Trunc, X, WideStepTy); + X = Builder.createWidenCast(Instruction::Trunc, X, WideStepTy); Def->replaceAllUsesWith(X); return; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index a94187383a014..6b7d0a994a29d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -315,19 +315,12 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 37, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP8]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 9, [[TMP5]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP12:%.*]] = icmp ult [[TMP11]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: [[TMP13:%.*]] = select [[TMP12]], splat (i1 true), zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = select [[TMP12]], zeroinitializer, zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = or [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP19]], align 4 [[TMP16]], [[TMP15]], i32 [[TMP8]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP19]], align 4 [[TMP16]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]]