Skip to content

Commit b4a0fd4

Browse files
committed
[SLP]Fix PR89635: do not try to vectorize single-gather alternate node.
No need to try to vectorize single gather/buildvector with alternate opcode graph, it is not profitable. In other cases, need to use last instruction for inserting the vectorized code.
1 parent e0a763c commit b4a0fd4

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -9640,6 +9640,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
96409640
bool IsAllowedSingleBVNode =
96419641
VectorizableTree.size() > 1 ||
96429642
(VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
9643+
!VectorizableTree.front()->isAltShuffle() &&
96439644
VectorizableTree.front()->getOpcode() != Instruction::PHI &&
96449645
VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
96459646
allSameBlock(VectorizableTree.front()->Scalars));
@@ -11032,7 +11033,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1103211033
isUsedOutsideBlock(V);
1103311034
}) ||
1103411035
(E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
11035-
all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
11036+
all_of(E->Scalars, [](Value *V) {
11037+
return isa<ExtractElementInst, UndefValue>(V) ||
11038+
areAllOperandsNonInsts(V);
11039+
})))
1103611040
Res.second = FindLastInst();
1103711041
else
1103811042
Res.second = FindFirstInst();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define <2 x i32> @test(i32 %arg) {
5+
; CHECK-LABEL: define <2 x i32> @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: bb:
8+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0
9+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
10+
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
11+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
12+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
13+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
14+
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
15+
;
16+
bb:
17+
%or = or i32 %arg, 0
18+
%mul = mul i32 0, 1
19+
%mul1 = mul i32 %or, %mul
20+
%cmp = icmp ugt i32 0, %mul1
21+
%0 = insertelement <2 x i32> poison, i32 %or, i32 0
22+
%1 = insertelement <2 x i32> %0, i32 %mul, i32 1
23+
ret <2 x i32> %1
24+
}
25+

0 commit comments

Comments
 (0)