Skip to content

Commit 247d3ea

Browse files
committed
[SLP] Expand non-power-of-two bailout in TryToFindDuplicates
This fixes a crash noticed when doing a downstream merge. The test case has been reduced, and is included in this commit. The existing bailout for non-power-of-two vectors in TryToFindDuplicates did not consider the case where the list being vectorized had no root node. This allowed reshuffled scalars to slip through to code which does not yet expect to handle it. This was an existing bug (likely introduced by my ed03070), but made easier to hit by 63e8a1b
1 parent 4634a48 commit 247d3ea

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6989,7 +6989,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
69896989
ReuseShuffleIndices.clear();
69906990
} else {
69916991
// FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
6992-
if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
6992+
if ((UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) ||
6993+
!llvm::has_single_bit(VL.size())) {
69936994
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
69946995
"for nodes with padding.\n");
69956996
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,36 @@ define double @dot_product_fp64(ptr %a, ptr %b) {
762762
ret double %add.1
763763
}
764764

765+
;; Covers a case where SLP would previous crash due to a
766+
;; missing bailout in TryToFindDuplicates for the case
767+
;; where a VL=3 list was vectorized directly (without
768+
;; a root instruction such as a store or reduce).
769+
define double @no_root_reshuffle(ptr %ptr) {
770+
; CHECK-LABEL: @no_root_reshuffle(
771+
; CHECK-NEXT: entry:
772+
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[PTR:%.*]], align 8
773+
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[TMP0]], [[TMP0]]
774+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
775+
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
776+
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
777+
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
778+
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
779+
; CHECK-NEXT: [[MUL6:%.*]] = fmul fast double [[TMP3]], [[TMP1]]
780+
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[MUL6]], [[MUL]]
781+
; CHECK-NEXT: ret double [[ADD]]
782+
;
783+
entry:
784+
%0 = load double, ptr %ptr, align 8
785+
%mul = fmul fast double %0, %0
786+
%arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 8
787+
%1 = load double, ptr %arrayidx2, align 8
788+
%arrayidx3 = getelementptr inbounds i8, ptr %ptr, i64 16
789+
%2 = load double, ptr %arrayidx3, align 8
790+
%3 = fmul fast double %2, %2
791+
%mul6 = fmul fast double %3, %1
792+
%add = fadd fast double %mul6, %mul
793+
ret double %add
794+
}
765795

766796
declare float @llvm.fmuladd.f32(float, float, float)
767797

0 commit comments

Comments
 (0)