Skip to content

Commit a3c0340

Browse files
committed
Precisely track blend selects to see if they have been simplified, add test case
1 parent 78c3a08 commit a3c0340

File tree

2 files changed

+93
-3
lines changed

2 files changed

+93
-3
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7020,6 +7020,7 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70207020
};
70217021

70227022
DenseSet<Instruction *> SeenInstrs;
7023+
SmallDenseMap<PHINode *, unsigned> BlendPhis;
70237024
auto Iter = vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry());
70247025
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
70257026
for (VPRecipeBase &R : *VPBB) {
@@ -7048,11 +7049,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70487049
return true;
70497050

70507051
// VPBlendRecipes are converted to selects and may have been simplified.
7052+
// Keep track of how many selects each phi has been converted to.
70517053
using namespace VPlanPatternMatch;
70527054
if (match(&R, m_VPInstruction<Instruction::Select>(
7053-
m_VPValue(), m_VPValue(), m_VPValue())) &&
7054-
isa_and_nonnull<PHINode>(R.getVPSingleValue()->getUnderlyingValue()))
7055-
return true;
7055+
m_VPValue(), m_VPValue(), m_VPValue())))
7056+
if (auto *Phi = dyn_cast_if_present<PHINode>(
7057+
R.getVPSingleValue()->getUnderlyingValue()))
7058+
BlendPhis[Phi]++;
70567059

70577060
/// If a VPlan transform folded a recipe to one producing a single-scalar,
70587061
/// but the original instruction wasn't uniform-after-vectorization in the
@@ -7077,6 +7080,12 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
70777080
}
70787081
}
70797082

7083+
// If a phi has been simplified then it will have less selects than the number
7084+
// of incoming values.
7085+
for (auto [Phi, NumSelects] : BlendPhis)
7086+
if (NumSelects != Phi->getNumIncomingValues() - 1)
7087+
return true;
7088+
70807089
// Return true if the loop contains any instructions that are not also part of
70817090
// the VPlan or are skipped for VPlan-based cost computations. This indicates
70827091
// that the VPlan contains extra simplifications.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt < %s -S -p loop-vectorize | FileCheck %s
3+
4+
; VPlanTransforms::simplifyRecipes will simplify some of selects stemming from
5+
; the blend recipe, which will cause a difference between the legacy and VPlan
6+
; based cost models. Make sure we account for this simplifcation. This is
7+
; extracted from sqlite3 in llvm-test-suite.
8+
9+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
10+
target triple = "riscv64-unknown-linux-gnu"
11+
12+
define i64 @html_encode() #0 {
13+
; CHECK-LABEL: define i64 @html_encode(
14+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
15+
; CHECK-NEXT: [[ENTRY:.*]]:
16+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17+
; CHECK: [[VECTOR_PH]]:
18+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
19+
; CHECK: [[VECTOR_BODY]]:
20+
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr null, align 1
21+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[TMP0]], i64 0
22+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
23+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[BROADCAST_SPLAT]], splat (i8 38)
24+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1)
25+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> zeroinitializer, [[PREDPHI]]
26+
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
27+
; CHECK: [[MIDDLE_BLOCK]]:
28+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
29+
; CHECK-NEXT: br i1 true, label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
30+
; CHECK: [[SCALAR_PH]]:
31+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
32+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
33+
; CHECK: [[SW_BB6:.*]]:
34+
; CHECK-NEXT: br label %[[FOR_INC:.*]]
35+
; CHECK: [[FOR_BODY]]:
36+
; CHECK-NEXT: [[J_031:%.*]] = phi i64 [ [[INC12:%.*]], %[[FOR_INC]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
37+
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr null, align 1
38+
; CHECK-NEXT: switch i8 [[TMP4]], label %[[FOR_INC]] [
39+
; CHECK-NEXT: i8 62, label %[[FOR_INC]]
40+
; CHECK-NEXT: i8 1, label %[[FOR_INC]]
41+
; CHECK-NEXT: i8 38, label %[[SW_BB6]]
42+
; CHECK-NEXT: i8 0, label %[[FOR_INC]]
43+
; CHECK-NEXT: ]
44+
; CHECK: [[FOR_INC]]:
45+
; CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ 0, %[[SW_BB6]] ], [ 1, %[[FOR_BODY]] ], [ 1, %[[FOR_BODY]] ], [ 1, %[[FOR_BODY]] ], [ 1, %[[FOR_BODY]] ]
46+
; CHECK-NEXT: [[INC:%.*]] = or i64 0, [[DOTSINK]]
47+
; CHECK-NEXT: [[INC12]] = add i64 [[J_031]], 1
48+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[J_031]], 1
49+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
50+
; CHECK: [[FOR_END_LOOPEXIT]]:
51+
; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i64 [ [[INC]], %[[FOR_INC]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
52+
; CHECK-NEXT: ret i64 [[INC_LCSSA]]
53+
;
54+
entry:
55+
br label %for.body
56+
57+
sw.bb6: ; preds = %for.body
58+
br label %for.inc
59+
60+
for.body: ; preds = %for.inc, %entry
61+
%j.031 = phi i64 [ %inc12, %for.inc ], [ 0, %entry ]
62+
%0 = load i8, ptr null, align 1
63+
switch i8 %0, label %for.inc [
64+
i8 62, label %for.inc
65+
i8 1, label %for.inc
66+
i8 38, label %sw.bb6
67+
i8 0, label %for.inc
68+
]
69+
70+
for.inc: ; preds = %for.body, %for.body, %for.body, %for.body, %sw.bb6
71+
%.sink = phi i64 [ 0, %sw.bb6 ], [ 1, %for.body ], [ 1, %for.body ], [ 1, %for.body ], [ 1, %for.body ]
72+
%inc = or i64 0, %.sink
73+
%inc12 = add i64 %j.031, 1
74+
%exitcond.not = icmp eq i64 %j.031, 1
75+
br i1 %exitcond.not, label %for.end.loopexit, label %for.body
76+
77+
for.end.loopexit: ; preds = %for.inc
78+
ret i64 %inc
79+
}
80+
81+
attributes #0 = { "target-features"="+v" }

0 commit comments

Comments
 (0)