Skip to content

Commit c9faedd

Browse files
authored
[VPlan] Fold common edges away in convertPhisToBlends (#150368)
If a phi is widened with tail folding, all of its predecessors will have a mask of the form %x = logical-and %active-lane-mask, %foo %y = logical-and %active-lane-mask, %bar %z = logical-and %active-lane-mask, %baz ... We can remove the common %active-lane-mask from all of these edge masks, which in turn allows us to simplify a lot of VPBlendRecipes. In particular, it allows the header mask to be removed in selects with EVL tail folding, improving RISC-V codegen on SPEC CPU 2017 for 525.x264_r, and supersedes #147243. This also allows us to remove VPBlendRecipe and directly emit VPInstruction::Select in another patch.
1 parent 100180f commit c9faedd

21 files changed

+98
-134
lines changed

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
#include "VPRecipeBuilder.h"
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
17+
#include "VPlanPatternMatch.h"
1718
#include "VPlanTransforms.h"
1819
#include "VPlanUtils.h"
1920
#include "llvm/ADT/PostOrderIterator.h"
2021

2122
using namespace llvm;
23+
using namespace VPlanPatternMatch;
2224

2325
namespace {
2426
class VPPredicator {
@@ -65,6 +67,10 @@ class VPPredicator {
6567
return EdgeMaskCache[{Src, Dst}] = Mask;
6668
}
6769

70+
/// Given a phi \p PhiR, try to see if its incoming blocks all share a common
71+
/// edge and return its mask.
72+
VPValue *findCommonEdgeMask(const VPPhi *PhiR) const;
73+
6874
public:
6975
/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
7076
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
@@ -226,6 +232,21 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
226232
setEdgeMask(Src, DefaultDst, DefaultMask);
227233
}
228234

235+
VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const {
236+
VPValue *EdgeMask = getEdgeMask(PhiR->getIncomingBlock(0), PhiR->getParent());
237+
VPValue *CommonEdgeMask;
238+
if (!EdgeMask ||
239+
!match(EdgeMask, m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue())))
240+
return nullptr;
241+
for (const VPBasicBlock *InVPBB : drop_begin(PhiR->incoming_blocks())) {
242+
EdgeMask = getEdgeMask(InVPBB, PhiR->getParent());
243+
assert(EdgeMask && "Both null and non-null edge masks found");
244+
if (!match(EdgeMask, m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue())))
245+
return nullptr;
246+
}
247+
return CommonEdgeMask;
248+
}
249+
229250
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
230251
SmallVector<VPPhi *> Phis;
231252
for (VPRecipeBase &R : VPBB->phis())
@@ -237,6 +258,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
237258
// be duplications since this is a simple recursive scan, but future
238259
// optimizations will clean it up.
239260

261+
VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR);
240262
SmallVector<VPValue *, 2> OperandsWithMask;
241263
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
242264
OperandsWithMask.push_back(InVPV);
@@ -246,6 +268,15 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
246268
"Distinct incoming values with one having a full mask");
247269
break;
248270
}
271+
272+
// If all incoming blocks share a common edge, remove it from the mask.
273+
if (CommonEdgeMask) {
274+
VPValue *X;
275+
if (match(EdgeMask,
276+
m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue(X))))
277+
EdgeMask = X;
278+
}
279+
249280
OperandsWithMask.push_back(EdgeMask);
250281
}
251282
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
13391339
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 16 x i8> [[TMP17]], [[TMP18]]
13401340
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 16 x i8> [[TMP19]], splat (i8 1)
13411341
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 16 x i1> [[TMP20]], <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> splat (i32 255)
1342-
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP16]], <vscale x 16 x i32> [[TMP21]], <vscale x 16 x i32> zeroinitializer
1342+
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i32> [[TMP21]], <vscale x 16 x i32> zeroinitializer
13431343
; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32>
13441344
; PRED-NEXT: [[TMP23:%.*]] = sub <vscale x 16 x i32> [[PREDPHI]], [[TMP22]]
13451345
; PRED-NEXT: [[TMP24:%.*]] = sitofp <vscale x 16 x i32> [[TMP23]] to <vscale x 16 x float>

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
6868
; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
6969
; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
7070
; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
71-
; TFCOMMON-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
72-
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
73-
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
71+
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
72+
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
7473
; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
7574
; TFCOMMON-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
7675
; TFCOMMON: pred.store.if:
@@ -115,12 +114,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
115114
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
116115
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
117116
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
118-
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
119-
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = fcmp ule <2 x double> [[TMP12]], zeroinitializer
120-
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
121-
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK2]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer
122-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
123-
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP18]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
117+
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
118+
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
119+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP13]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
120+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
124121
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0
125122
; TFA_INTERLEAVE-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
126123
; TFA_INTERLEAVE: pred.store.if:

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
191191
; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], splat (i64 50)
192192
; TFCOMMON-NEXT: [[TMP7:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
193193
; TFCOMMON-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP7]])
194-
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> [[TMP8]], <vscale x 2 x i64> zeroinitializer
194+
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP8]], <vscale x 2 x i64> zeroinitializer
195195
; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
196196
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
197197
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -228,8 +228,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
228228
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
229229
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP13]])
230230
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP14]])
231-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> zeroinitializer
232-
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP14]], <vscale x 2 x i64> [[TMP16]], <vscale x 2 x i64> zeroinitializer
231+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> zeroinitializer
232+
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> [[TMP16]], <vscale x 2 x i64> zeroinitializer
233233
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
234234
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
235235
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
@@ -348,7 +348,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
348348
; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]])
349349
; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
350350
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP10]])
351-
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP9]]
351+
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP9]]
352352
; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
353353
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
354354
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -391,8 +391,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
391391
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
392392
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP19]])
393393
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP20]])
394-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]]
395-
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]]
394+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]]
395+
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]]
396396
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
397397
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
398398
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1
@@ -973,25 +973,23 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
973973
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
974974
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
975975
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
976-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP13:.*]] ]
977-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP13]] ]
978-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP13]] ]
976+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[TMP11:.*]] ]
977+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP11]] ]
978+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP11]] ]
979979
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
980980
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
981981
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
982-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ule double [[TMP5]], 0.000000e+00
983-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ule double [[TMP6]], 0.000000e+00
984-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP7]], i1 false
985-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP8]], i1 false
986-
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP11]], double 1.000000e+00, double 0.000000e+00
987-
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP12]], double 1.000000e+00, double 0.000000e+00
982+
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
983+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00
984+
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP7]], double 0.000000e+00, double 1.000000e+00
985+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP8]], double 0.000000e+00, double 1.000000e+00
988986
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[PREDPHI]]
989987
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK2]]
990-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[BB12:.*]], label %[[TMP13]]
991-
; TFA_INTERLEAVE: [[BB12]]:
988+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[BB10:.*]], label %[[TMP11]]
989+
; TFA_INTERLEAVE: [[BB10]]:
992990
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
993-
; TFA_INTERLEAVE-NEXT: br label %[[TMP13]]
994-
; TFA_INTERLEAVE: [[TMP13]]:
991+
; TFA_INTERLEAVE-NEXT: br label %[[TMP11]]
992+
; TFA_INTERLEAVE: [[TMP11]]:
995993
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
996994
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1
997995
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no
10741074
; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> zeroinitializer
10751075
; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
10761076
; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> poison)
1077-
; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00)
1077+
; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD1]], <vscale x 4 x float> splat (float 3.000000e+00)
10781078
; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x float> [[PREDPHI]], <vscale x 4 x float> splat (float -0.000000e+00)
10791079
; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[TMP11]])
10801080
; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 {
245245
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
246246
; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP14]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
247247
; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD1]]
248-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[TMP17]], <vscale x 4 x i32> [[VEC_PHI]]
248+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> [[TMP17]], <vscale x 4 x i32> [[VEC_PHI]]
249249
; CHECK-NEXT: [[TMP20]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x i32> [[VEC_PHI]]
250250
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
251251
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
335335
; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], zeroinitializer
336336
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
337337
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> poison)
338-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32> zeroinitializer
338+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32> zeroinitializer
339339
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX1]]
340340
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[PREDPHI]], ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
341341
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]]

llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ define void @cond_uniform_load(ptr noalias nocapture %dst, ptr nocapture readonl
7878
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[COND_LOAD]], zeroinitializer
7979
; CHECK-NEXT: [[MASK:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
8080
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[SRC_SPLAT]], i32 4, <4 x i1> [[MASK]], <4 x i32> poison)
81-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK]], <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer
81+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer
8282
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX6]]
8383
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[PREDPHI]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
8484
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX6]], 4

0 commit comments

Comments
 (0)