Skip to content

Commit bbd1bb4

Browse files
committed
[SLP]Set insert point for split node with non-scheulable instructions after the last instruction
Need to set the insert point for non-schedulable instructions in SplitVectorize node after the last instruction, not before, to avoid a crash in case of buildvector subvector node.
1 parent 9387281 commit bbd1bb4

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -14874,7 +14874,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
1487414874
bool IsPHI = isa<PHINode>(LastInst);
1487514875
if (IsPHI)
1487614876
LastInstIt = LastInst->getParent()->getFirstNonPHIIt();
14877-
if (IsPHI || (!E->isGather() && doesNotNeedToSchedule(E->Scalars)) ||
14877+
if (IsPHI ||
14878+
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
14879+
doesNotNeedToSchedule(E->Scalars)) ||
1487814880
(GatheredLoadsEntriesFirst.has_value() &&
1487914881
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
1488014882
E->getOpcode() == Instruction::Load)) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i64 @test(i256 %0, { i32, i1 } %1) {
5+
; CHECK-LABEL: define i64 @test(
6+
; CHECK-SAME: i256 [[TMP0:%.*]], { i32, i1 } [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
9+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i256 [[TMP0]] to i32
10+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
11+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <2 x i32> zeroinitializer
12+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
13+
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
14+
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
15+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
16+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP8]], i32 1
17+
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 2
18+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
19+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
20+
; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP13]], <4 x i32> [[TMP12]], i64 4)
21+
; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i32> [[TMP14]], zeroinitializer
22+
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i1> [[TMP15]] to i8
23+
; CHECK-NEXT: [[TMP17:%.*]] = call i8 @llvm.ctpop.i8(i8 [[TMP16]])
24+
; CHECK-NEXT: [[TMP18:%.*]] = zext i8 [[TMP17]] to i64
25+
; CHECK-NEXT: ret i64 [[TMP18]]
26+
;
27+
entry:
28+
%2 = extractvalue { i32, i1 } %1, 0
29+
%cmp = icmp ne i32 %2, 0
30+
%cond = zext i1 %cmp to i64
31+
%conv = trunc i256 %0 to i32
32+
%cmp8 = icmp ne i32 %conv, 0
33+
%cond10 = zext i1 %cmp8 to i64
34+
%3 = add i64 %cond10, %cond
35+
%cmp24 = icmp ne i32 %conv, 0
36+
%cond26 = zext i1 %cmp24 to i64
37+
%4 = add i64 %3, %cond26
38+
%5 = extractvalue { i32, i1 } %1, 0
39+
%cmp42 = icmp ne i32 %5, 0
40+
%cond44 = zext i1 %cmp42 to i64
41+
%6 = add i64 %4, %cond44
42+
%conv47 = trunc i256 %0 to i32
43+
%cmp54 = icmp ne i32 %conv47, 0
44+
%cond56 = zext i1 %cmp54 to i64
45+
%7 = add i64 %6, %cond56
46+
%cmp70 = icmp ne i32 %conv47, 0
47+
%cond72 = zext i1 %cmp70 to i64
48+
%8 = add i64 %7, %cond72
49+
%9 = extractvalue { i32, i1 } %1, 0
50+
%cmp87 = icmp ne i32 %9, 0
51+
%cond89 = zext i1 %cmp87 to i64
52+
%10 = add i64 %8, %cond89
53+
%cmp92 = icmp ne i32 %9, 0
54+
%cond94 = zext i1 %cmp92 to i64
55+
%11 = add i64 %10, %cond94
56+
ret i64 %11
57+
}

0 commit comments

Comments
 (0)