Skip to content

Commit 3f9e073

Browse files
authored
[VPlan] Move findCommonEdgeMask optimization to simplifyBlends (#156304)
Following up from #150368, this moves folding common edge masks into simplifyBlends. One test in uniform-blend.ll ended up regressing but after looking at it closely, it came from a weird (x && !x) edge mask. So I've just included a simplifcation in this PR to fold that to false.
1 parent 128e7ed commit 3f9e073

File tree

5 files changed

+33
-37
lines changed

5 files changed

+33
-37
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,12 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
24262426
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
24272427
}
24282428

2429+
/// Set mask number \p Idx to \p V.
2430+
void setMask(unsigned Idx, VPValue *V) {
2431+
assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2432+
Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2433+
}
2434+
24292435
void execute(VPTransformState &State) override {
24302436
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
24312437
}

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,6 @@ class VPPredicator {
6767
return EdgeMaskCache[{Src, Dst}] = Mask;
6868
}
6969

70-
/// Given a phi \p PhiR, try to see if its incoming blocks all share a common
71-
/// edge and return its mask.
72-
VPValue *findCommonEdgeMask(const VPPhi *PhiR) const;
73-
7470
public:
7571
/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
7672
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
@@ -232,21 +228,6 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
232228
setEdgeMask(Src, DefaultDst, DefaultMask);
233229
}
234230

235-
VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const {
236-
VPValue *EdgeMask = getEdgeMask(PhiR->getIncomingBlock(0), PhiR->getParent());
237-
VPValue *CommonEdgeMask;
238-
if (!EdgeMask ||
239-
!match(EdgeMask, m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue())))
240-
return nullptr;
241-
for (const VPBasicBlock *InVPBB : drop_begin(PhiR->incoming_blocks())) {
242-
EdgeMask = getEdgeMask(InVPBB, PhiR->getParent());
243-
assert(EdgeMask && "Both null and non-null edge masks found");
244-
if (!match(EdgeMask, m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue())))
245-
return nullptr;
246-
}
247-
return CommonEdgeMask;
248-
}
249-
250231
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
251232
SmallVector<VPPhi *> Phis;
252233
for (VPRecipeBase &R : VPBB->phis())
@@ -258,7 +239,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
258239
// be duplications since this is a simple recursive scan, but future
259240
// optimizations will clean it up.
260241

261-
VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR);
262242
SmallVector<VPValue *, 2> OperandsWithMask;
263243
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
264244
OperandsWithMask.push_back(InVPV);
@@ -269,14 +249,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
269249
break;
270250
}
271251

272-
// If all incoming blocks share a common edge, remove it from the mask.
273-
if (CommonEdgeMask) {
274-
VPValue *X;
275-
if (match(EdgeMask,
276-
m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue(X))))
277-
EdgeMask = X;
278-
}
279-
280252
OperandsWithMask.push_back(EdgeMask);
281253
}
282254
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
11081108
return Def->replaceAllUsesWith(
11091109
Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));
11101110

1111+
// x && !x -> 0
1112+
if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
1113+
return Def->replaceAllUsesWith(Plan->getOrAddLiveIn(
1114+
ConstantInt::getFalse(VPTypeAnalysis(*Plan).inferScalarType(Def))));
1115+
11111116
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
11121117
return Def->replaceAllUsesWith(X);
11131118

@@ -1318,6 +1323,23 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
13181323
}
13191324
}
13201325

1326+
/// Try to see if all of \p Blend's masks share a common value logically and'ed
1327+
/// and remove it from the masks.
1328+
static void removeCommonBlendMask(VPBlendRecipe *Blend) {
1329+
if (Blend->isNormalized())
1330+
return;
1331+
VPValue *CommonEdgeMask;
1332+
if (!match(Blend->getMask(0),
1333+
m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue())))
1334+
return;
1335+
for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++)
1336+
if (!match(Blend->getMask(I),
1337+
m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue())))
1338+
return;
1339+
for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++)
1340+
Blend->setMask(I, Blend->getMask(I)->getDefiningRecipe()->getOperand(1));
1341+
}
1342+
13211343
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
13221344
/// to make sure the masks are simplified.
13231345
static void simplifyBlends(VPlan &Plan) {
@@ -1328,6 +1350,8 @@ static void simplifyBlends(VPlan &Plan) {
13281350
if (!Blend)
13291351
continue;
13301352

1353+
removeCommonBlendMask(Blend);
1354+
13311355
// Try to remove redundant blend recipes.
13321356
SmallPtrSet<VPValue *, 4> UniqueValues;
13331357
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))

llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
2626
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2727
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1001, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
2828
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
29-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[TMP25]], i64 0
30-
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
3129
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP25]] to i64
3230
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
3331
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
34-
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 8 x i32> @llvm.stepvector.nxv8i32()
35-
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <vscale x 8 x i32> [[TMP10]], [[BROADCAST_SPLAT8]]
3632
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
3733
; CHECK-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
3834
; CHECK-NEXT: [[TMP9:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP14]], <vscale x 8 x i1> zeroinitializer
@@ -42,9 +38,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
4238
; CHECK-NEXT: [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i1> [[TMP18]], <vscale x 8 x i1> zeroinitializer
4339
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true)
4440
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
45-
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 8 x i1> [[TMP11]], <vscale x 8 x i1> [[TMP21]], <vscale x 8 x i1> zeroinitializer
4641
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
47-
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP15]], i32 0
42+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP21]], i32 0
4843
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]]
4944
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]]
5045
; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr align 2 [[TMP24]], <vscale x 8 x i1> [[TMP22]], i32 [[TMP25]])

llvm/test/Transforms/LoopVectorize/uniform-blend.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ define void @blend_chain_iv(i1 %c) {
130130
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
131131
; CHECK: [[VECTOR_BODY]]:
132132
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
133-
; CHECK-NEXT: [[PREDPHI:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
134-
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef
133+
; CHECK-NEXT: [[PREDPHI1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
135134
; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI1]], <4 x i64> undef
136135
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 0
137136
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]]
@@ -146,7 +145,7 @@ define void @blend_chain_iv(i1 %c) {
146145
; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2
147146
; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2
148147
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
149-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI]], splat (i64 4)
148+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI1]], splat (i64 4)
150149
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
151150
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
152151
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)