Skip to content

Conversation

alexey-bataev
Copy link
Member

Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Created using spr 1.3.5
@llvmbot
Copy link
Member

llvmbot commented Aug 13, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.


Full diff: https://github.com/llvm/llvm-project/pull/153393.diff

4 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+54-18)
  • (modified) llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll (+2-5)
  • (modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll (+2-4)
  • (modified) llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll (+2-4)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3045eeb3eb48e..f71faa2e2a7d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10571,27 +10571,29 @@ class InstructionsCompatibilityAnalysis {
     BasicBlock *Parent = nullptr;
     // Checks if the instruction has supported opcode.
     auto IsSupportedOpcode = [&](Instruction *I) {
-      return I && I->getOpcode() == Instruction::Add &&
+      return I &&
+             (I->getOpcode() == Instruction::Add ||
+              I->getOpcode() == Instruction::LShr) &&
              (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
     };
     // Exclude operands instructions immediately to improve compile time, it
     // will be unable to schedule anyway.
     SmallDenseSet<Value *, 8> Operands;
+    SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
     for (Value *V : VL) {
       auto *I = dyn_cast<Instruction>(V);
       if (!I)
         continue;
       if (!DT.isReachableFromEntry(I->getParent()))
         continue;
-      if (!MainOp) {
-        MainOp = I;
+      if (Candidates.empty()) {
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
         Operands.insert(I->op_begin(), I->op_end());
         continue;
       }
       if (Parent == I->getParent()) {
-        if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
-          MainOp = I;
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Operands.insert(I->op_begin(), I->op_end());
         continue;
       }
@@ -10603,24 +10605,37 @@ class InstructionsCompatibilityAnalysis {
                  (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
              "Different nodes should have different DFS numbers");
       if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
-        MainOp = I;
+        Candidates.clear();
+        Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
         Parent = I->getParent();
         Operands.clear();
         Operands.insert(I->op_begin(), I->op_end());
       }
     }
-    if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
-      MainOp = nullptr;
-      return;
+    unsigned BestOpcodeNum = 0;
+    MainOp = nullptr;
+    for (const auto &P : Candidates) {
+      if (P.second.size() < BestOpcodeNum)
+        continue;
+      for (Instruction *I : P.second) {
+        if (IsSupportedOpcode(I) && !Operands.contains(I)) {
+          MainOp = I;
+          BestOpcodeNum = P.second.size();
+          break;
+        }
+      }
     }
-    MainOpcode = MainOp->getOpcode();
+    if (MainOp)
+      MainOpcode = MainOp->getOpcode();
   }
 
   /// Returns the idempotent value for the \p MainOp with the detected \p
   /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
   /// the operand itself, since V or V == V.
   Value *selectBestIdempotentValue() const {
-    assert(MainOpcode == Instruction::Add && "Unsupported opcode");
+    assert(
+        (MainOpcode == Instruction::Add || MainOpcode == Instruction::LShr) &&
+        "Unsupported opcode");
     return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
                                           !MainOp->isCommutative());
   }
@@ -10635,6 +10650,7 @@ class InstructionsCompatibilityAnalysis {
       return convertTo(cast<Instruction>(V), S).second;
     switch (MainOpcode) {
     case Instruction::Add:
+    case Instruction::LShr:
       return {V, selectBestIdempotentValue()};
     default:
       break;
@@ -10852,6 +10868,21 @@ class InstructionsCompatibilityAnalysis {
       }
       if (!Res)
         return InstructionsState::invalid();
+      constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
+      InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
+      InstructionCost VectorCost;
+      FixedVectorType *VecTy =
+          getWidenedType(S.getMainOp()->getType(), VL.size());
+      switch (MainOpcode) {
+      case Instruction::Add:
+      case Instruction::LShr:
+      VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
+        break;
+      default:
+        llvm_unreachable("Unexpected instruction.");
+      }
+      if (VectorCost > ScalarCost)
+        return InstructionsState::invalid();
       return S;
     }
     assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21064,6 +21095,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
         ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
         const auto *It = find(Op, CD->getInst());
         assert(It != Op.end() && "Lane not set");
+        SmallPtrSet<Instruction *, 4> Visited;
         do {
           int Lane = std::distance(Op.begin(), It);
           assert(Lane >= 0 && "Lane not set");
@@ -21085,13 +21117,15 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
                   (InsertInReadyList && UseSD->isReady()))
                 WorkList.push_back(UseSD);
             }
-          } else if (ScheduleData *UseSD = getScheduleData(In)) {
-            CD->incDependencies();
-            if (!UseSD->isScheduled())
-              CD->incrementUnscheduledDeps(1);
-            if (!UseSD->hasValidDependencies() ||
-                (InsertInReadyList && UseSD->isReady()))
-              WorkList.push_back(UseSD);
+          } else if (Visited.insert(In).second) {
+            if (ScheduleData *UseSD = getScheduleData(In)) {
+              CD->incDependencies();
+              if (!UseSD->isScheduled())
+                CD->incrementUnscheduledDeps(1);
+              if (!UseSD->hasValidDependencies() ||
+                  (InsertInReadyList && UseSD->isReady()))
+                WorkList.push_back(UseSD);
+            }
           }
           It = find(make_range(std::next(It), Op.end()), CD->getInst());
         } while (It != Op.end());
@@ -21845,6 +21879,8 @@ bool BoUpSLP::collectValuesToDemote(
       return all_of(E.Scalars, [&](Value *V) {
         if (isa<PoisonValue>(V))
           return true;
+        if (E.isCopyableElement(V))
+          return true;
         auto *I = cast<Instruction>(V);
         KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
         APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
index 8d44d03e0e5cc..6d961fc3378b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
@@ -8,11 +8,8 @@ define i32 @test(ptr %c) {
 ; CHECK-NEXT:    [[BITLEN:%.*]] = getelementptr i8, ptr [[C]], i64 136
 ; CHECK-NEXT:    [[INCDEC_PTR_3_1:%.*]] = getelementptr i8, ptr [[C]], i64 115
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[BITLEN]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <8 x i64> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
 ; CHECK-NEXT:    store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
 ; CHECK-NEXT:    ret i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
index 4f94784a24dd4..c02ef8388b066 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
@@ -101,10 +101,8 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
 define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X]], align 16
-; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[TMP5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
index 700e3ed9effc4..0545e5403f594 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -101,10 +101,8 @@ define <4 x float> @PR16739_byref_alt(ptr nocapture readonly dereferenceable(16)
 define <4 x float> @PR16739_byval(ptr nocapture readonly dereferenceable(16) %x) {
 ; CHECK-LABEL: @PR16739_byval(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[X:%.*]], align 16
-; CHECK-NEXT:    [[T1:%.*]] = load i64, ptr [[X]], align 16
-; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[TMP2]], <i64 0, i64 32, i64 0, i64 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
 ; CHECK-NEXT:    ret <4 x float> [[TMP5]]

Copy link

github-actions bot commented Aug 13, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Created using spr 1.3.5
Created using spr 1.3.5
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon
Copy link
Collaborator

RKSimon commented Aug 14, 2025

update with trunk to clear the CI error?

Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@alexey-bataev alexey-bataev changed the base branch from main to users/alexey-bataev/spr/main.slpsupport-lshr-as-base-for-copyable-elements August 14, 2025 16:00
Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@alexey-bataev alexey-bataev changed the base branch from users/alexey-bataev/spr/main.slpsupport-lshr-as-base-for-copyable-elements to main August 14, 2025 16:35
@alexey-bataev alexey-bataev merged commit ca4ebf9 into main Aug 14, 2025
11 of 12 checks passed
@alexey-bataev alexey-bataev deleted the users/alexey-bataev/spr/slpsupport-lshr-as-base-for-copyable-elements branch August 14, 2025 16:35
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 14, 2025
Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Reviewers: hiraditya, RKSimon

Reviewed By: RKSimon

Pull Request: llvm/llvm-project#153393
@asb
Copy link
Contributor

asb commented Aug 14, 2025

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

asb added a commit that referenced this pull request Aug 14, 2025
This reverts commit ca4ebf9.

Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b
configurations. See here for a minimal reproducer:
#153393 (comment)
@asb
Copy link
Contributor

asb commented Aug 14, 2025

I've landed a revert to get the bots green again. I'll add a zvl512b/zvl1024b config to the gauntlet bot tomorrow.

@alexey-bataev
Copy link
Member Author

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

This is a codegen crash, @topperc @preames could you check?

@durin42
Copy link
Contributor

durin42 commented Aug 14, 2025

We also saw a segfault building a stage2 rustc that root-caused to this, but I don't have time to try and reduce - hopefully it's related to the other report.

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 14, 2025
This reverts commit ca4ebf9.

Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b
configurations. See here for a minimal reproducer:
llvm/llvm-project#153393 (comment)
@mshockwave
Copy link
Member

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335
Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

This is a codegen crash, @topperc @preames could you check?

I can take a look

@mshockwave
Copy link
Member

mshockwave commented Aug 14, 2025

This caused a compile-time crash for zvl512b and zvl1024b RVV configurations (hopefully you got an email notification for failure on clang-riscv-rva23-zvl512b-2stage and clang-riscv-rva23-zvl1024b-2stage ?). I've confirmed this commit is the culprit via bisection. Here is an example failure on the buildbots https://lab.llvm.org/buildbot/#/builders/212/builds/335

Here is a reduced test case:

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define <8 x i16> @gsm_encode(ptr %s) {
entry:
  %0 = load <19 x i16>, ptr %s, align 2
  %1 = shufflevector <19 x i16> zeroinitializer, <19 x i16> %0, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 28, i32 31, i32 poison, i32 poison>
  %2 = shufflevector <9 x i16> %1, <9 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
  ret <8 x i16> %2
}

And the error:

]$ ./build/rvrel/bin/llc -O3 < reduced.ll -mattr=+rva23u64,+zvl512b
	.attribute	4, 16
	.attribute	5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_b1p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zaamo1p0_zalrsc1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcd1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0_supm1p0"
	.file	"<stdin>"
llc: ../../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:25828: SDValue narrowExtractedVectorLoad(EVT, SDValue, unsigned int, const SDLoc &, SelectionDAG &): Assertion `Index % NumElts == 0 && "The extract subvector index is not a " "multiple of the result's element count"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: ./build/rvrel/bin/llc -O3 -mattr=+rva23u64,+zvl512b
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@gsm_encode'
 #0 0x00006079cc8cfae6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (./build/rvrel/bin/llc+0x3efeae6)
 #1 0x00006079cc8cd085 llvm::sys::RunSignalHandlers() (./build/rvrel/bin/llc+0x3efc085)
 #2 0x00006079cc8d0944 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007880bce4bcd0 (/usr/lib/libc.so.6+0x3dcd0)
 #4 0x00007880bcea5624 (/usr/lib/libc.so.6+0x97624)
 #5 0x00007880bce4bba0 raise (/usr/lib/libc.so.6+0x3dba0)
 #6 0x00007880bce33582 abort (/usr/lib/libc.so.6+0x25582)
 #7 0x00007880bce334eb __assert_perror_fail (/usr/lib/libc.so.6+0x254eb)
 #8 0x00006079cc4cd527 (anonymous namespace)::DAGCombiner::visitEXTRACT_SUBVECTOR(llvm::SDNode*) DAGCombiner.cpp:0:0
 #9 0x00006079cc47f006 (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) DAGCombiner.cpp:0:0
#10 0x00006079cc47c8b9 llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) (./build/rvrel/bin/llc+0x3aab8b9)
#11 0x00006079cc69b56c llvm::SelectionDAGISel::CodeGenAndEmitDAG() (./build/rvrel/bin/llc+0x3cca56c)
#12 0x00006079cc69a756 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (./build/rvrel/bin/llc+0x3cc9756)
#13 0x00006079cc6975d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc65d1)
#14 0x00006079cc694f49 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (./build/rvrel/bin/llc+0x3cc3f49)
#15 0x00006079cb842ce3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x2e71ce3)
#16 0x00006079cbdece19 llvm::FPPassManager::runOnFunction(llvm::Function&) (./build/rvrel/bin/llc+0x341be19)
#17 0x00006079cbdf54d2 llvm::FPPassManager::runOnModule(llvm::Module&) (./build/rvrel/bin/llc+0x34244d2)
#18 0x00006079cbded8a2 llvm::legacy::PassManagerImpl::run(llvm::Module&) (./build/rvrel/bin/llc+0x341c8a2)
#19 0x00006079cac87418 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#20 0x00006079cac84970 main (./build/rvrel/bin/llc+0x22b3970)
#21 0x00007880bce35488 (/usr/lib/libc.so.6+0x27488)
#22 0x00007880bce3554c __libc_start_main (/usr/lib/libc.so.6+0x2754c)
#23 0x00006079cac808a5 _start (./build/rvrel/bin/llc+0x22af8a5)
Aborted (core dumped)

This should have been catchable on the faster gauntlet bot but I need to add zvl512b/zvl1024b to its test matrix.

Candidate PR: #153709

To verify, I reapplied this patch locally and confirmed that my patch could fix the issue here.

@mshockwave
Copy link
Member

While the DAGCombiner was wrong, I have a meta question for @alexey-bataev: is it expected for SLP to generate <19 x i16> in the first place? I thought it would try to avoid illegal types.

@alexey-bataev
Copy link
Member Author

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

mshockwave added a commit that referenced this pull request Aug 14, 2025
Reported from
#153393 (comment)

During DAGCombine, an intermediate extract_subvector sequence was
generated:
```
  t8: v9i16 = extract_subvector t3, Constant:i64<9>
t24: v8i16 = extract_subvector t8, Constant:i64<0>
```
And one of the DAGCombine rule which turns `(extract_subvector
(extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in
and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But
it forgot to check if the extracted index is a multiple of the minimum
vector length of the result type, hence the crash.

This patch fixes this by adding an additional check.
@mshockwave
Copy link
Member

mshockwave commented Aug 15, 2025

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

<19 x i16> is considered an illegal type for the RISCV backend and since we need to spend extra instructions to split it (as you also mentioned) during type legalization, I vaguely remember loop vectorizer would give it a higher cost and thus avoiding such types. But maybe that's just LV.

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 15, 2025
… (#153709)

Reported from
llvm/llvm-project#153393 (comment)

During DAGCombine, an intermediate extract_subvector sequence was
generated:
```
  t8: v9i16 = extract_subvector t3, Constant:i64<9>
t24: v8i16 = extract_subvector t8, Constant:i64<0>
```
And one of the DAGCombine rule which turns `(extract_subvector
(extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in
and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But
it forgot to check if the extracted index is a multiple of the minimum
vector length of the result type, hence the crash.

This patch fixes this by adding an additional check.
@alexey-bataev
Copy link
Member Author

What is illegal here? It tries to emit long vectors, relying on backend (for now) on splitting.

<19 x i16> is considered an illegal type for the RISCV backend and since we need to spend extra instructions to split it (as you also mentioned) during type legalization, I vaguely remember loop vectorizer would give it a higher cost and thus avoiding such types. But maybe that's just LV.

LV is different here, at least now. There is a plan to fix it.

alexey-bataev added a commit that referenced this pull request Aug 15, 2025
Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Fixed scheduling after cancellation

Reviewers: hiraditya, RKSimon

Reviewed By: RKSimon

Pull Request: #153393
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 15, 2025
Added support for LShr instructions as base for copyable elements. Also,
added simple analysis for best base instruction selection, if multiple
candidates are available.

Fixed scheduling after cancellation

Reviewers: hiraditya, RKSimon

Reviewed By: RKSimon

Pull Request: llvm/llvm-project#153393
@aeubanks
Copy link
Contributor

hi, this introduces crashes, even at head after the fixes:

$ cat /tmp/a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_RNvMs0_NtCsjIjFnkOzZDy_6object5writeNtB5_6Object4emit() {
.loopexit381:
  %0 = trunc i64 0 to i32
  %1 = select i1 false, i32 %0, i32 0
  %2 = trunc i64 0 to i32
  %3 = select i1 false, i32 %2, i32 0
  %4 = add i32 %2, 0
  %5 = select i1 false, i32 %4, i32 0
  br label %6

6:                                                ; preds = %.loopexit381
  %7 = getelementptr i8, ptr null, i64 12
  store i32 %1, ptr %7, align 4
  %8 = getelementptr i8, ptr null, i64 16
  store i32 %1, ptr %8, align 4
  %9 = getelementptr i8, ptr null, i64 20
  store i32 %3, ptr %9, align 4
  %10 = getelementptr i8, ptr null, i64 24
  store i32 %5, ptr %10, align 4
  ret void
}
$ build/rel/bin/opt -p slp-vectorizer -disable-output /tmp/a.ll
opt: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:5048: int llvm::slpvectorizer::BoUpSLP::ScheduleCopyableData::incrementUnscheduledDeps(int): Assertion `UnscheduledDeps >= 0 && "invariant"' failed.

mdenson pushed a commit to mdenson/llvm-project that referenced this pull request Aug 16, 2025
This reverts commit ca4ebf9.

Causes compile-time crashes for some inputs with RVV zvl512b/zvl1024b
configurations. See here for a minimal reproducer:
llvm#153393 (comment)
mdenson pushed a commit to mdenson/llvm-project that referenced this pull request Aug 16, 2025
Reported from
llvm#153393 (comment)

During DAGCombine, an intermediate extract_subvector sequence was
generated:
```
  t8: v9i16 = extract_subvector t3, Constant:i64<9>
t24: v8i16 = extract_subvector t8, Constant:i64<0>
```
And one of the DAGCombine rule which turns `(extract_subvector
(extract_subvector X, C), 0)` into `(extract_subvector X, C)` kicked in
and turn that into `v8i16 = extract_subvector t3, Constant:i64<9>`. But
it forgot to check if the extracted index is a multiple of the minimum
vector length of the result type, hence the crash.

This patch fixes this by adding an additional check.
@alexey-bataev
Copy link
Member Author

hi, this introduces crashes, even at head after the fixes:

$ cat /tmp/a.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_RNvMs0_NtCsjIjFnkOzZDy_6object5writeNtB5_6Object4emit() {
.loopexit381:
  %0 = trunc i64 0 to i32
  %1 = select i1 false, i32 %0, i32 0
  %2 = trunc i64 0 to i32
  %3 = select i1 false, i32 %2, i32 0
  %4 = add i32 %2, 0
  %5 = select i1 false, i32 %4, i32 0
  br label %6

6:                                                ; preds = %.loopexit381
  %7 = getelementptr i8, ptr null, i64 12
  store i32 %1, ptr %7, align 4
  %8 = getelementptr i8, ptr null, i64 16
  store i32 %1, ptr %8, align 4
  %9 = getelementptr i8, ptr null, i64 20
  store i32 %3, ptr %9, align 4
  %10 = getelementptr i8, ptr null, i64 24
  store i32 %5, ptr %10, align 4
  ret void
}
$ build/rel/bin/opt -p slp-vectorizer -disable-output /tmp/a.ll
opt: ../../llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:5048: int llvm::slpvectorizer::BoUpSLP::ScheduleCopyableData::incrementUnscheduledDeps(int): Assertion `UnscheduledDeps >= 0 && "invariant"' failed.

Checked the most recent version, cannot reproduce it

@mikaelholmen
Copy link
Collaborator

Hi @alexey-bataev

Another crash here with this patch:
opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null

Result:

opt: ../lib/Transforms/Vectorize/SLPVectorizer.cpp:20853: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &, const EdgeInfo &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null
1.	Running pass "function(slp-vectorizer)" on module "bbi-109672.ll"
2.	Running pass "slp-vectorizer" on function "main"
 #0 0x0000556b1173f286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47ee286)
 #1 0x0000556b1173c815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47eb815)
 #2 0x0000556b11740409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f448d3bb990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f448ad5b52f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f448ad2ee65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f448ad2ed39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f448ad53e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000556b1327ae22 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&)::$_1::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
 #9 0x0000556b131df805 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628e805)
#10 0x0000556b131dcf36 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bf36)
#11 0x0000556b131dcb46 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bb46)
#12 0x0000556b131fc1ae llvm::slpvectorizer::BoUpSLP::transformNodes() (build-all/bin/opt+0x62ab1ae)
#13 0x0000556b1328b819 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::DataLayout const&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&, llvm::AssumptionCache*) SLPVectorizer.cpp:0:0
#14 0x0000556b1324b367 llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) (build-all/bin/opt+0x62fa367)
#15 0x0000556b1324ca02 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62fba02)
#16 0x0000556b13240f9c llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62eff9c)
#17 0x0000556b1323db86 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (build-all/bin/opt+0x62ecb86)
#18 0x0000556b1323d0f7 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x62ec0f7)
#19 0x0000556b12c0d63d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#20 0x0000556b119594d5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a084d5)
#21 0x0000556b12c0977d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#22 0x0000556b1195e08e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0d08e)
#23 0x0000556b12b98e1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#24 0x0000556b119581c5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a071c5)
#25 0x0000556b12b91c64 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c40c64)
#26 0x0000556b116e0a78 optMain (build-all/bin/opt+0x478fa78)
#27 0x00007f448ad477e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#28 0x0000556b116de4ae _start (build-all/bin/opt+0x478d4ae)
Abort (core dumped)

I've tested against latest trunk 673750f and it still crashes there.

bbi-109672.ll.gz

@sjoerdmeijer
Copy link
Collaborator

Came here to say the same things as @mikaelholmen.
I see the same crash with the same backtrace.

@aeubanks
Copy link
Contributor

Checked the most recent version, cannot reproduce it

this seems to have been fixed with 758c685

@mikaelholmen
Copy link
Collaborator

Hi @alexey-bataev

Another crash here with this patch: opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null

Result:

opt: ../lib/Transforms/Vectorize/SLPVectorizer.cpp:20853: auto llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *>, BoUpSLP *, const InstructionsState &, const EdgeInfo &)::(anonymous class)::operator()(bool, ScheduleBundle &) const: Assertion `Picked->isReady() && "must be ready to schedule"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: build-all/bin/opt -passes=slp-vectorizer bbi-109672.ll -S -o /dev/null
1.	Running pass "function(slp-vectorizer)" on module "bbi-109672.ll"
2.	Running pass "slp-vectorizer" on function "main"
 #0 0x0000556b1173f286 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (build-all/bin/opt+0x47ee286)
 #1 0x0000556b1173c815 llvm::sys::RunSignalHandlers() (build-all/bin/opt+0x47eb815)
 #2 0x0000556b11740409 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007f448d3bb990 __restore_rt (/lib64/libpthread.so.0+0x12990)
 #4 0x00007f448ad5b52f raise (/lib64/libc.so.6+0x4e52f)
 #5 0x00007f448ad2ee65 abort (/lib64/libc.so.6+0x21e65)
 #6 0x00007f448ad2ed39 _nl_load_domain.cold.0 (/lib64/libc.so.6+0x21d39)
 #7 0x00007f448ad53e86 (/lib64/libc.so.6+0x46e86)
 #8 0x0000556b1327ae22 llvm::slpvectorizer::BoUpSLP::BlockScheduling::tryScheduleBundle(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP*, (anonymous namespace)::InstructionsState const&, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&)::$_1::operator()(bool, llvm::slpvectorizer::BoUpSLP::ScheduleBundle&) const SLPVectorizer.cpp:0:0
 #9 0x0000556b131df805 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628e805)
#10 0x0000556b131dcf36 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bf36)
#11 0x0000556b131dcb46 llvm::slpvectorizer::BoUpSLP::buildTreeRec(llvm::ArrayRef<llvm::Value*>, unsigned int, llvm::slpvectorizer::BoUpSLP::EdgeInfo const&, unsigned int) (build-all/bin/opt+0x628bb46)
#12 0x0000556b131fc1ae llvm::slpvectorizer::BoUpSLP::transformNodes() (build-all/bin/opt+0x62ab1ae)
#13 0x0000556b1328b819 (anonymous namespace)::HorizontalReduction::tryToReduce(llvm::slpvectorizer::BoUpSLP&, llvm::DataLayout const&, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo const&, llvm::AssumptionCache*) SLPVectorizer.cpp:0:0
#14 0x0000556b1324b367 llvm::SLPVectorizerPass::vectorizeHorReduction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&, llvm::SmallVectorImpl<llvm::WeakTrackingVH>&) (build-all/bin/opt+0x62fa367)
#15 0x0000556b1324ca02 llvm::SLPVectorizerPass::vectorizeRootInstruction(llvm::PHINode*, llvm::Instruction*, llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62fba02)
#16 0x0000556b13240f9c llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (build-all/bin/opt+0x62eff9c)
#17 0x0000556b1323db86 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (build-all/bin/opt+0x62ecb86)
#18 0x0000556b1323d0f7 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x62ec0f7)
#19 0x0000556b12c0d63d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#20 0x0000556b119594d5 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (build-all/bin/opt+0x4a084d5)
#21 0x0000556b12c0977d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) PassBuilderPipelines.cpp:0:0
#22 0x0000556b1195e08e llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a0d08e)
#23 0x0000556b12b98e1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) NewPMDriver.cpp:0:0
#24 0x0000556b119581c5 llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (build-all/bin/opt+0x4a071c5)
#25 0x0000556b12b91c64 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (build-all/bin/opt+0x5c40c64)
#26 0x0000556b116e0a78 optMain (build-all/bin/opt+0x478fa78)
#27 0x00007f448ad477e5 __libc_start_main (/lib64/libc.so.6+0x3a7e5)
#28 0x0000556b116de4ae _start (build-all/bin/opt+0x478d4ae)
Abort (core dumped)

I've tested against latest trunk 673750f and it still crashes there.

bbi-109672.ll.gz

Ping @alexey-bataev

This still happens at latest trunk, c6fbd12.

@alexey-bataev
Copy link
Member Author

Hi, sorry for the delay, don't have the access to the computer this week, will fix next week.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

9 participants