[SLP]Support LShr as base for copyable elements #153393

alexey-bataev · 2025-08-13T11:20:26Z

Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Created using spr 1.3.5

llvmbot · 2025-08-13T11:21:00Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Full diff: https://github.com/llvm/llvm-project/pull/153393.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+54-18)
(modified) llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll (+2-5)
(modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll (+2-4)
(modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll (+2-4)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3045eeb3eb48e..f71faa2e2a7d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10571,27 +10571,29 @@ class InstructionsCompatibilityAnalysis {
     BasicBlock *Parent = nullptr;
     // Checks if the instruction has supported opcode.
     auto IsSupportedOpcode = [&](Instruction *I) {
-      return I && I->getOpcode() == Instruction::Add &&
+      return I &&
+             (I->getOpcode() == Instruction::Add ||
+              I->getOpcode() == Instruction::LShr) &&
              (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
     };
     // Exclude operands instructions immediately to improve compile time, it
     // will be unable to schedule anyway.
     SmallDenseSet<Value *, 8> Operands;
+    SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
     for (Value *V : VL) {
       auto *I = dyn_cast<Instruction>(V);
       if (!I)
         continue;
       if (!DT.isReachableFromEntry(I->getParent()))
         continue;
-      if (!MainOp) {
-        MainOp = I;
+      if (Candidates.empty()) {
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
         Operands.insert(I->op_begin(), I->op_end());
         continue;
       }
       if (Parent == I->getParent()) {
-        if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
-          MainOp = I;
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Operands.insert(I->op_begin(), I->op_end());
         continue;
       }
@@ -10603,24 +10605,37 @@ class InstructionsCompatibilityAnalysis {
                  (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
              "Different nodes should have different DFS numbers");
       if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
-        MainOp = I;
+        Candidates.clear();
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
         Operands.clear();
         Operands.insert(I->op_begin(), I->op_end());
       }
     }
-    if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
-      MainOp = nullptr;
-      return;
+    unsigned BestOpcodeNum = 0;
+    MainOp = nullptr;
+    for (const auto &P : Candidates) {
+      if (P.second.size() < BestOpcodeNum)
+        continue;
+      for (Instruction *I : P.second) {
+        if (IsSupportedOpcode(I) && !Operands.contains(I)) {
+          MainOp = I;
+          BestOpcodeNum = P.second.size();
+          break;
+        }
+      }
     }
-    MainOpcode = MainOp->getOpcode();
+    if (MainOp)
+      MainOpcode = MainOp->getOpcode();
   }
 
   /// Returns the idempotent value for the \p MainOp with the detected \p
   /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
   /// the operand itself, since V or V == V.
   Value *selectBestIdempotentValue() const {
-    assert(MainOpcode == Instruction::Add && "Unsupported opcode");
+    assert(
+        (MainOpcode == Instruction::Add || MainOpcode == Instruction::LShr) &&
+        "Unsupported opcode");
     return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
                                           !MainOp->isCommutative());
   }
@@ -10635,6 +10650,7 @@ class InstructionsCompatibilityAnalysis {
       return convertTo(cast<Instruction>(V), S).second;
     switch (MainOpcode) {
     case Instruction::Add:
+    case Instruction::LShr:
       return {V, selectBestIdempotentValue()};
     default:
       break;
@@ -10852,6 +10868,21 @@ class InstructionsCompatibilityAnalysis {
       }
       if (!Res)
         return InstructionsState::invalid();
+      constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
+      InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
+      InstructionCost VectorCost;
+      FixedVectorType *VecTy =
+          getWidenedType(S.getMainOp()->getType(), VL.size());
+      switch (MainOpcode) {
+      case Instruction::Add:
+      case Instruction::LShr:
+      VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
+        break;
+      default:
+        llvm_unreachable("Unexpected instruction.");
+      }
+      if (VectorCost > ScalarCost)
+        return InstructionsState::invalid();
       return S;
     }
     assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21064,6 +21095,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
         ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
         const auto *It = find(Op, CD->getInst());
         assert(It != Op.end() && "Lane not set");
+        SmallPtrSet<Instruction *, 4> Visited;
         do {
           int Lane = std::distance(Op.begin(), It);
           assert(Lane >= 0 && "Lane not set");
@@ -21085,13 +21117,15 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
                   (InsertInReadyList && UseSD->isReady()))
                 WorkList.push_back(UseSD);
             }
-          } else if (ScheduleData *UseSD = getScheduleData(In)) {
-            CD->incDependencies();
-            if (!UseSD->isScheduled())
-              CD->incrementUnscheduledDeps(1);
-            if (!UseSD->hasValidDependencies() ||
-                (InsertInReadyList && UseSD->isReady()))
-              WorkList.push_back(UseSD);
+          } else if (Visited.insert(In).second) {
+            if (ScheduleData *UseSD = getScheduleData(In)) {
+              CD->incDependencies();
+              if (!UseSD->isScheduled())
+                CD->incrementUnscheduledDeps(1);
+              if (!UseSD->hasValidDependencies() ||
+                  (InsertInReadyList && UseSD->isReady()))
+                WorkList.push_back(UseSD);
+            }
           }
           It = find(make_range(std::next(It), Op.end()), CD->getInst());
         } while (It != Op.end());
@@ -21845,6 +21879,8 @@ bool BoUpSLP::collectValuesToDemote(
       return all_of(E.Scalars, [&](Value *V) {
         if (isa<PoisonValue>(V))
           return true;
+        if (E.isCopyableElement(V))
+          return true;
         auto *I = cast<Instruction>(V);
         KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
         APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
index 8d44d03e0e5cc..6d961fc3378b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
@@ -8,11 +8,8 @@ define i32 @test(ptr %c) {
 ; CHECK-NEXT:    [[BITLEN:%.*]] = getelementptr i8, ptr [[C]], i64 136
 ; CHECK-NEXT:    [[INCDEC_PTR_3_1:%.*]] = getelementptr i8, ptr [[C]], i64 115
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[BITLEN]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <8 x i64> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
 ; CHECK-NEXT:    store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
 ; CHECK-NEXT:    ret i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
index 4f94784a24dd4..c02ef8388b066 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
@@ -101,10 +101,8 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
 define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X]], align 16
-; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[TMP5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
index 700e3ed9effc4..0545e5403f594 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -101,10 +101,8 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
 define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X]], align 16
-; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[TMP5]]

github-actions · 2025-08-13T11:23:52Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Created using spr 1.3.5

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Created using spr 1.3.5

RKSimon

LGTM

RKSimon · 2025-08-14T15:53:24Z

update with trunk to clear the CI error?

Created using spr 1.3.5 [skip ci]

Created using spr 1.3.5

Created using spr 1.3.5 [skip ci]

Created using spr 1.3.5

Added support for LShr instructions as base for copyable elements. Also, added simple analysis for best base instruction selection, if multiple candidates are available. Reviewers: hiraditya, RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#153393

asb · 2025-08-14T21:17:36Z

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

This reverts commit ca4ebf9. Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b configurations. See here for a minimal reproducer: #153393 (comment)

asb · 2025-08-14T21:26:27Z

I've landed a revert to get the bots green again. I'll add a zvl512b/zvl1024b config to the gauntlet bot tomorrow.

alexey-bataev · 2025-08-14T21:29:32Z

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

This is a codegen crash, @topperc @preames could you check?

durin42 · 2025-08-14T21:31:37Z

We also saw a segfault building a stage2 rustc that root-caused to this, but I don't have time to try and reduce - hopefully it's related to the other report.

This reverts commit ca4ebf9. Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b configurations. See here for a minimal reproducer: llvm/llvm-project#153393 (comment)

mshockwave · 2025-08-14T21:56:57Z

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335
Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

This is a codegen crash, @topperc @preames could you check?

I can take a look

mshockwave · 2025-08-14T22:57:03Z

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

Candidate PR: #153709

To verify, I reapplied this patch locally and confirmed that my patch could fix the issue here.

mshockwave · 2025-08-14T23:22:27Z

While the DAGCombiner was wrong, I have a meta question for @alexey-bataev: is it expected for SLP to generate <19 x i16> in the first place? I thought it would try to avoid illegal types.

alexey-bataev · 2025-08-14T23:23:58Z

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

Reported from #153393 (comment) During DAGCombine, an intermediate extract_subvector sequence was generated: ``` t8: v9i16 = extract_subvector t3, Constant:i64<9> t24: v8i16 = extract_subvector t8, Constant:i64<0> ``` And one of the DAGCombine rule which turns `(extract_subvector (extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But it forgot to check if the extracted index is a multiple of the minimum vector length of the result type, hence the crash. This patch fixes this by adding an additional check.

mshockwave · 2025-08-15T00:08:26Z

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

<19 x i16> is considered an illegal type for the RISCV backend and since we need to spend extra instructions to split it (as you also mentioned) during type legalization, I vaguely remember loop vectorizer would give it a higher cost and thus avoiding such types. But maybe that's just LV.

… (#153709) Reported from llvm/llvm-project#153393 (comment) During DAGCombine, an intermediate extract_subvector sequence was generated: ``` t8: v9i16 = extract_subvector t3, Constant:i64<9> t24: v8i16 = extract_subvector t8, Constant:i64<0> ``` And one of the DAGCombine rule which turns `(extract_subvector (extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But it forgot to check if the extracted index is a multiple of the minimum vector length of the result type, hence the crash. This patch fixes this by adding an additional check.

alexey-bataev · 2025-08-15T01:22:50Z

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

<19 x i16> is considered an illegal type for the RISCV backend and since we need to spend extra instructions to split it (as you also mentioned) during type legalization, I vaguely remember loop vectorizer would give it a higher cost and thus avoiding such types. But maybe that's just LV.

LV is different here, at least now. There is a plan to fix it.

Added support for LShr instructions as base for copyable elements. Also, added simple analysis for best base instruction selection, if multiple candidates are available. Fixed scheduling after cancellation Reviewers: hiraditya, RKSimon Reviewed By: RKSimon Pull Request: #153393

Added support for LShr instructions as base for copyable elements. Also, added simple analysis for best base instruction selection, if multiple candidates are available. Fixed scheduling after cancellation Reviewers: hiraditya, RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#153393

aeubanks · 2025-08-15T21:38:42Z

hi, this introduces crashes, even at head after the fixes:

$ cat /tmp/a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_RNvMs0_NtCsjIjFnkOzZDy_6object5writeNtB5_6Object4emit() {
.loopexit381:
  %0 = trunc i64 0 to i32
  %1 = select i1 false, i32 %0, i32 0
  %2 = trunc i64 0 to i32
  %3 = select i1 false, i32 %2, i32 0
  %4 = add i32 %2, 0
  %5 = select i1 false, i32 %4, i32 0
  br label %6

6:                                                ; preds = %.loopexit381
  %7 = getelementptr i8, ptr null, i64 12
  store i32 %1, ptr %7, align 4
  %8 = getelementptr i8, ptr null, i64 16
  store i32 %1, ptr %8, align 4
  %9 = getelementptr i8, ptr null, i64 20
  store i32 %3, ptr %9, align 4
  %10 = getelementptr i8, ptr null, i64 24
  store i32 %5, ptr %10, align 4
  ret void
}
$ build/rel/bin/opt -p slp-vectorizer -disable-output /tmp/a.ll
opt: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:5048: int llvm::slpvectorizer::BoUpSLP::ScheduleCopyableData::incrementUnscheduledDeps(int): Assertion `UnscheduledDeps >= 0 && "invariant"' failed.

This reverts commit ca4ebf9. Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b configurations. See here for a minimal reproducer: llvm#153393 (comment)

Reported from llvm#153393 (comment) During DAGCombine, an intermediate extract_subvector sequence was generated: ``` t8: v9i16 = extract_subvector t3, Constant:i64<9> t24: v8i16 = extract_subvector t8, Constant:i64<0> ``` And one of the DAGCombine rule which turns `(extract_subvector (extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But it forgot to check if the extracted index is a multiple of the minimum vector length of the result type, hence the crash. This patch fixes this by adding an additional check.

alexey-bataev · 2025-08-16T12:16:26Z

hi, this introduces crashes, even at head after the fixes:

$ cat /tmp/a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_RNvMs0_NtCsjIjFnkOzZDy_6object5writeNtB5_6Object4emit() {
.loopexit381:
  %0 = trunc i64 0 to i32
  %1 = select i1 false, i32 %0, i32 0
  %2 = trunc i64 0 to i32
  %3 = select i1 false, i32 %2, i32 0
  %4 = add i32 %2, 0
  %5 = select i1 false, i32 %4, i32 0
  br label %6

6:                                                ; preds = %.loopexit381
  %7 = getelementptr i8, ptr null, i64 12
  store i32 %1, ptr %7, align 4
  %8 = getelementptr i8, ptr null, i64 16
  store i32 %1, ptr %8, align 4
  %9 = getelementptr i8, ptr null, i64 20
  store i32 %3, ptr %9, align 4
  %10 = getelementptr i8, ptr null, i64 24
  store i32 %5, ptr %10, align 4
  ret void
}
$ build/rel/bin/opt -p slp-vectorizer -disable-output /tmp/a.ll
opt: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:5048: int llvm::slpvectorizer::BoUpSLP::ScheduleCopyableData::incrementUnscheduledDeps(int): Assertion `UnscheduledDeps >= 0 && "invariant"' failed.

Checked the most recent version, cannot reproduce it

mikaelholmen · 2025-08-18T12:11:09Z

Hi @alexey-bataev

Another crash here with this patch:
opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null

Result:

opt: ../lib/Transforms/Vectorize/SLPVectorizer.cpp:20853: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &, const EdgeInfo &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null
1.	Running pass "function(slp-vectorizer)" on module "bbi-109672.ll"
2.	Running pass "slp-vectorizer" on function "main"
 #0 0x0000556b1173f286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47ee286)
 #1 0x0000556b1173c815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47eb815)
 #2 0x0000556b11740409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f448d3bb990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f448ad5b52f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f448ad2ee65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f448ad2ed39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f448ad53e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000556b1327ae22 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&)::$_1::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
 #9 0x0000556b131df805 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628e805)
#10 0x0000556b131dcf36 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bf36)
#11 0x0000556b131dcb46 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bb46)
#12 0x0000556b131fc1ae llvm::slpvectorizer::BoUpSLP::transformNodes() (build-all/bin/opt+0x62ab1ae)
#13 0x0000556b1328b819 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::DataLayout const&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&, llvm::AssumptionCache*) SLPVectorizer.cpp:0:0
#14 0x0000556b1324b367 llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) (build-all/bin/opt+0x62fa367)
#15 0x0000556b1324ca02 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62fba02)
#16 0x0000556b13240f9c llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62eff9c)
#17 0x0000556b1323db86 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (build-all/bin/opt+0x62ecb86)
#18 0x0000556b1323d0f7 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x62ec0f7)
#19 0x0000556b12c0d63d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#20 0x0000556b119594d5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a084d5)
#21 0x0000556b12c0977d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#22 0x0000556b1195e08e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0d08e)
#23 0x0000556b12b98e1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#24 0x0000556b119581c5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a071c5)
#25 0x0000556b12b91c64 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c40c64)
#26 0x0000556b116e0a78 optMain (build-all/bin/opt+0x478fa78)
#27 0x00007f448ad477e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#28 0x0000556b116de4ae _start (build-all/bin/opt+0x478d4ae)
Abort (core dumped)

I've tested against latest trunk 673750f and it still crashes there.

bbi-109672.ll.gz

sjoerdmeijer · 2025-08-18T13:25:45Z

Came here to say the same things as @mikaelholmen.
I see the same crash with the same backtrace.

aeubanks · 2025-08-18T16:29:33Z

Checked the most recent version, cannot reproduce it

this seems to have been fixed with 758c685

mikaelholmen · 2025-08-20T04:54:53Z

Hi @alexey-bataev

Another crash here with this patch: opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null

Result:

opt: ../lib/Transforms/Vectorize/SLPVectorizer.cpp:20853: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &, const EdgeInfo &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null
1.	Running pass "function(slp-vectorizer)" on module "bbi-109672.ll"
2.	Running pass "slp-vectorizer" on function "main"
 #0 0x0000556b1173f286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47ee286)
 #1 0x0000556b1173c815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47eb815)
 #2 0x0000556b11740409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f448d3bb990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f448ad5b52f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f448ad2ee65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f448ad2ed39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f448ad53e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000556b1327ae22 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&)::$_1::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
 #9 0x0000556b131df805 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628e805)
#10 0x0000556b131dcf36 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bf36)
#11 0x0000556b131dcb46 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bb46)
#12 0x0000556b131fc1ae llvm::slpvectorizer::BoUpSLP::transformNodes() (build-all/bin/opt+0x62ab1ae)
#13 0x0000556b1328b819 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::DataLayout const&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&, llvm::AssumptionCache*) SLPVectorizer.cpp:0:0
#14 0x0000556b1324b367 llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) (build-all/bin/opt+0x62fa367)
#15 0x0000556b1324ca02 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62fba02)
#16 0x0000556b13240f9c llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62eff9c)
#17 0x0000556b1323db86 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (build-all/bin/opt+0x62ecb86)
#18 0x0000556b1323d0f7 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x62ec0f7)
#19 0x0000556b12c0d63d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#20 0x0000556b119594d5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a084d5)
#21 0x0000556b12c0977d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#22 0x0000556b1195e08e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0d08e)
#23 0x0000556b12b98e1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#24 0x0000556b119581c5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a071c5)
#25 0x0000556b12b91c64 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c40c64)
#26 0x0000556b116e0a78 optMain (build-all/bin/opt+0x478fa78)
#27 0x00007f448ad477e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#28 0x0000556b116de4ae _start (build-all/bin/opt+0x478d4ae)
Abort (core dumped)

I've tested against latest trunk 673750f and it still crashes there.

bbi-109672.ll.gz

Ping @alexey-bataev

This still happens at latest trunk, c6fbd12.

alexey-bataev · 2025-08-20T11:04:47Z

Hi, sorry for the delay, don't have the access to the computer this week, will fix next week.

[𝘀𝗽𝗿] initial version

95b5a29

Created using spr 1.3.5

llvmbot added vectorizers llvm:transforms labels Aug 13, 2025

alexey-bataev requested review from hiraditya and RKSimon August 13, 2025 11:20

Fix formatting

f1917c4

Created using spr 1.3.5

RKSimon reviewed Aug 13, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Show resolved Hide resolved

Address comments

aa9d260

Created using spr 1.3.5

RKSimon approved these changes Aug 14, 2025

View reviewed changes

alexey-bataev added 2 commits August 14, 2025 16:00

[𝘀𝗽𝗿] changes introduced through rebase

13be490

Created using spr 1.3.5 [skip ci]

Rebase

dac1fe0

Created using spr 1.3.5

alexey-bataev changed the base branch from main to users/alexey-bataev/spr/main.slpsupport-lshr-as-base-for-copyable-elements August 14, 2025 16:00

alexey-bataev added 2 commits August 14, 2025 16:02

[𝘀𝗽𝗿] changes introduced through rebase

8664542

Created using spr 1.3.5 [skip ci]

Rebase

d09c054

Created using spr 1.3.5

alexey-bataev changed the base branch from users/alexey-bataev/spr/main.slpsupport-lshr-as-base-for-copyable-elements to main August 14, 2025 16:35

alexey-bataev merged commit ca4ebf9 into main Aug 14, 2025
11 of 12 checks passed

alexey-bataev deleted the users/alexey-bataev/spr/slpsupport-lshr-as-base-for-copyable-elements branch August 14, 2025 16:35

mshockwave mentioned this pull request Aug 14, 2025

[DAGCombine] Fix an incorrect folding of extract_subvector #153709

Merged

[SLP]Support LShr as base for copyable elements #153393

[SLP]Support LShr as base for copyable elements #153393

Uh oh!

Conversation

alexey-bataev commented Aug 13, 2025

Uh oh!

llvmbot commented Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon commented Aug 14, 2025

Uh oh!

Uh oh!

asb commented Aug 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

asb commented Aug 14, 2025

Uh oh!

alexey-bataev commented Aug 14, 2025

Uh oh!

durin42 commented Aug 14, 2025

Uh oh!

mshockwave commented Aug 14, 2025

Uh oh!

mshockwave commented Aug 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mshockwave commented Aug 14, 2025

Uh oh!

alexey-bataev commented Aug 14, 2025

Uh oh!

mshockwave commented Aug 15, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

alexey-bataev commented Aug 15, 2025

Uh oh!

aeubanks commented Aug 15, 2025

Uh oh!

alexey-bataev commented Aug 16, 2025

Uh oh!

mikaelholmen commented Aug 18, 2025

Uh oh!

sjoerdmeijer commented Aug 18, 2025

Uh oh!

aeubanks commented Aug 18, 2025

Uh oh!

mikaelholmen commented Aug 20, 2025

Uh oh!

alexey-bataev commented Aug 20, 2025

Uh oh!

Uh oh!

llvmbot commented Aug 13, 2025 •

edited

Loading

github-actions bot commented Aug 13, 2025 •

edited

Loading

asb commented Aug 14, 2025 •

edited

Loading

mshockwave commented Aug 14, 2025 •

edited

Loading

mshockwave commented Aug 15, 2025 •

edited

Loading