From 71e24f02d18b92434c450372213d80a9d4ae4c16 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 26 Aug 2025 18:18:50 +0800 Subject: [PATCH 1/2] [VPlan] Rewrite optimizeMaskToEVL in terms of pattern matching. NFC Stacked on #155383 Currently in optimizeMaskToEVL we convert every widened load, store or reduction to a VP predicated recipe with EVL, regardless of whether or not it uses the header mask. So currently we have to be careful when working on other parts VPlan to make sure that the EVL transform doesn't break or transform something incorrectly, because it's not a semantics preserving transform. Forgetting to do so has caused miscompiles before, like the case that was fixed in #113667 This PR rewrites it to work in terms of pattern matching, so it now only converts a recipe to a VP predicated recipe if it uses the header mask. It also splits out the load/store transforms into separate patterns for reversed and non-reversed, which should make #146525 easier to implement and reason about. After this the transform should be a true optimisation and not change any semantics, so it shouldn't miscompile things if other parts of VPlan change. This fixes #152541, and allows us to move addExplicitVectorLength into tryToBuildVPlanWithVPRecipes in #153144 --- .../Transforms/Vectorize/VPlanPatternMatch.h | 76 +++++++++- .../Transforms/Vectorize/VPlanTransforms.cpp | 142 +++++++++--------- .../RISCV/blocks-with-dead-instructions.ll | 2 +- ...ruction-or-drop-poison-generating-flags.ll | 2 +- 4 files changed, 152 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 109156c1469c5..b57233e3d82bd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -255,7 +255,8 @@ struct Recipe_match { if constexpr (std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value) + std::is_same::value || + std::is_same::value) return DefR; else return DefR && DefR->getOpcode() == Opcode; @@ -587,6 +588,79 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return VPDerivedIV_match({Op0, Op1, Op2}); } +template struct Load_match { + Addr_t Addr; + Mask_t Mask; + + Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {} + + template bool match(const OpTy *V) const { + auto *Load = dyn_cast(V); + if (!Load || Load->isReverse() != Reverse || !Addr.match(Load->getAddr()) || + !Load->isMasked() || !Mask.match(Load->getMask())) + return false; + return true; + } +}; + +/// Match a non-reversed masked load. +template +inline Load_match m_Load(const Addr_t &Addr, + const Mask_t &Mask) { + return Load_match(Addr, Mask); +} + +/// Match a reversed masked load. +template +inline Load_match m_ReverseLoad(const Addr_t &Addr, + const Mask_t &Mask) { + return Load_match(Addr, Mask); +} + +template +struct Store_match { + Addr_t Addr; + Val_t Val; + Mask_t Mask; + + Store_match(Addr_t Addr, Val_t Val, Mask_t Mask) + : Addr(Addr), Val(Val), Mask(Mask) {} + + template bool match(const OpTy *V) const { + auto *Store = dyn_cast(V); + if (!Store || Store->isReverse() != Reverse || + !Addr.match(Store->getAddr()) || !Val.match(Store->getStoredValue()) || + !Store->isMasked() || !Mask.match(Store->getMask())) + return false; + return true; + } +}; + +/// Match a non-reversed masked store. +template +inline Store_match +m_Store(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) { + return Store_match(Addr, Val, Mask); +} + +/// Match a reversed masked store. +template +inline Store_match +m_ReverseStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) { + return Store_match(Addr, Val, Mask); +} + +template +using VectorEndPointerRecipe_match = + Recipe_match, 0, + /*Commutative*/ false, VPVectorEndPointerRecipe>; + +template +VectorEndPointerRecipe_match m_VecEndPtr(const Op0_t &Op0, + const Op1_t &Op1) { + return VectorEndPointerRecipe_match(Op0, Op1); +} + /// Match a call argument at a given argument index. template struct Argument_match { /// Call argument index to match. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a942d52cbca94..e82be09dde96f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2272,90 +2272,98 @@ void VPlanTransforms::addActiveLaneMask( HeaderMask->eraseFromParent(); } +template struct RemoveMask_match { + Op0_t In; + Op1_t &Out; + + RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {} + + template bool match(OpTy *V) const { + if (m_Specific(In).match(V)) { + Out = nullptr; + return true; + } + if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V)) + return true; + return false; + } +}; + +/// Match a specific mask \p in, or a combination of it (logical-and in, out). +/// Returns the remaining part \p out if so, or nullptr otherwise. +template +static inline RemoveMask_match m_RemoveMask(const Op0_t &In, + Op1_t &Out) { + return RemoveMask_match(In, Out); +} + /// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding /// EVL-based recipe without the header mask. Returns nullptr if no EVL-based /// recipe could be created. /// \p HeaderMask Header Mask. /// \p CurRecipe Recipe to be transform. /// \p TypeInfo VPlan-based type analysis. -/// \p AllOneMask The vector mask parameter of vector-predication intrinsics. /// \p EVL The explicit vector length parameter of vector-predication /// intrinsics. static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, VPRecipeBase &CurRecipe, - VPTypeAnalysis &TypeInfo, - VPValue &AllOneMask, VPValue &EVL) { - // FIXME: Don't transform recipes to EVL recipes if they're not masked by the - // header mask. - auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * { - assert(OrigMask && "Unmasked recipe when folding tail"); - // HeaderMask will be handled using EVL. - VPValue *Mask; - if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask)))) - return Mask; - return HeaderMask == OrigMask ? nullptr : OrigMask; - }; + VPTypeAnalysis &TypeInfo, VPValue &EVL) { + VPlan *Plan = CurRecipe.getParent()->getPlan(); + VPValue *Addr, *Mask, *EndPtr; /// Adjust any end pointers so that they point to the end of EVL lanes not VF. - auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * { - auto *EndPtr = dyn_cast(Addr); - if (!EndPtr) - return Addr; - assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() && - "VPVectorEndPointerRecipe with non-VF VF operand?"); - assert( - all_of(EndPtr->users(), - [](VPUser *U) { - return cast(U)->isReverse(); - }) && - "VPVectorEndPointRecipe not used by reversed widened memory recipe?"); - VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone(); - EVLAddr->insertBefore(&CurRecipe); - EVLAddr->setOperand(1, &EVL); - return EVLAddr; + auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) { + auto *EVLEndPtr = cast(EndPtr)->clone(); + EVLEndPtr->insertBefore(&CurRecipe); + EVLEndPtr->setOperand(1, &EVL); + return EVLEndPtr; }; - return TypeSwitch(&CurRecipe) - .Case([&](VPWidenLoadRecipe *L) { - VPValue *NewMask = GetNewMask(L->getMask()); - VPValue *NewAddr = GetNewAddr(L->getAddr()); - return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask); - }) - .Case([&](VPWidenStoreRecipe *S) { - VPValue *NewMask = GetNewMask(S->getMask()); - VPValue *NewAddr = GetNewAddr(S->getAddr()); - return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask); - }) - .Case([&](VPInterleaveRecipe *IR) { - VPValue *NewMask = GetNewMask(IR->getMask()); - return new VPInterleaveEVLRecipe(*IR, EVL, NewMask); - }) - .Case([&](VPReductionRecipe *Red) { - VPValue *NewMask = GetNewMask(Red->getCondOp()); - return new VPReductionEVLRecipe(*Red, EVL, NewMask); - }) - .Case([&](VPInstruction *VPI) -> VPRecipeBase * { - VPValue *LHS, *RHS; - // Transform select with a header mask condition - // select(header_mask, LHS, RHS) - // into vector predication merge. - // vp.merge(all-true, LHS, RHS, EVL) - if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS), - m_VPValue(RHS)))) - return nullptr; - // Use all true as the condition because this transformation is - // limited to selects whose condition is a header mask. - return new VPWidenIntrinsicRecipe( - Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL}, - TypeInfo.inferScalarType(LHS), VPI->getDebugLoc()); - }) - .Default([&](VPRecipeBase *R) { return nullptr; }); + if (match(&CurRecipe, + m_Load(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask)))) + return new VPWidenLoadEVLRecipe(cast(CurRecipe), Addr, + EVL, Mask); + + if (match(&CurRecipe, + m_ReverseLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) && + match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF())))) + return new VPWidenLoadEVLRecipe(cast(CurRecipe), + AdjustEndPtr(EndPtr), EVL, Mask); + + if (match(&CurRecipe, m_Store(m_VPValue(Addr), m_VPValue(), + m_RemoveMask(HeaderMask, Mask)))) + return new VPWidenStoreEVLRecipe(cast(CurRecipe), Addr, + EVL, Mask); + + if (match(&CurRecipe, m_ReverseStore(m_VPValue(EndPtr), m_VPValue(), + m_RemoveMask(HeaderMask, Mask))) && + match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF())))) + return new VPWidenStoreEVLRecipe(cast(CurRecipe), + AdjustEndPtr(EndPtr), EVL, Mask); + + if (auto *Rdx = dyn_cast(&CurRecipe)) + if (Rdx->isConditional() && + match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask))) + return new VPReductionEVLRecipe(*Rdx, EVL, Mask); + + if (auto *Interleave = dyn_cast(&CurRecipe)) + if (Interleave->getMask() && + match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask))) + return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask); + + VPValue *LHS, *RHS; + if (match(&CurRecipe, + m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS)))) + return new VPWidenIntrinsicRecipe( + Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL}, + TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + + return nullptr; } /// Replace recipes with their EVL variants. static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { VPTypeAnalysis TypeInfo(Plan); - VPValue *AllOneMask = Plan.getTrue(); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); @@ -2414,7 +2422,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1)); VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe( Intrinsic::experimental_vp_splice, - {V1, V2, Imm, AllOneMask, PrevEVL, &EVL}, + {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL}, TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc()); VPSplice->insertBefore(&R); R.getVPSingleValue()->replaceAllUsesWith(VPSplice); @@ -2448,7 +2456,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { for (VPUser *U : collectUsersRecursively(EVLMask)) { auto *CurRecipe = cast(U); VPRecipeBase *EVLRecipe = - optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL); + optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL); if (!EVLRecipe) continue; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 631328a9a0964..b370d70af7544 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -454,7 +454,7 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( zeroinitializer, [[TMP20]], i32 2, [[TMP24]]) ; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index db6185087bac5..80c707931e651 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -48,7 +48,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[TMP21]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] -; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) +; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0( zeroinitializer, ptr [[TMP24]], i32 2, [[TMP22]]) ; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP26]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] From 3e19e28f3a5762e70f3d57fbbe3f6f350783d40a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 3 Sep 2025 09:50:05 +0800 Subject: [PATCH 2/2] Fix comment capitalization --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 074d41522ef91..382cd1c4f5331 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2402,8 +2402,8 @@ template struct RemoveMask_match { } }; -/// Match a specific mask \p in, or a combination of it (logical-and in, out). -/// Returns the remaining part \p out if so, or nullptr otherwise. +/// Match a specific mask \p In, or a combination of it (logical-and In, Out). +/// Returns the remaining part \p Out if so, or nullptr otherwise. template static inline RemoveMask_match m_RemoveMask(const Op0_t &In, Op1_t &Out) {