[VPlan] Extract reverse operation for reverse accesses #146525

Mel-Chen · 2025-07-01T13:22:57Z

This patch introduces VPInstruction::Reverse and extracts the reverse operations of loaded/stored values from reverse memory accesses. This extraction facilitates future support for permutation elimination within VPlan.

llvmbot · 2025-07-01T13:23:26Z

@llvm/pr-subscribers-backend-powerpc
@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Mel Chen (Mel-Chen)

Changes

This patch introduces VPInstruction::Reverse and extracts the reverse operations of loaded/stored values from reverse memory accesses. This extraction facilitates future support for permutation elimination within VPlan.

Patch is 69.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146525.diff

18 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+6)
(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/VPlan.h (+2)
(modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+1)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+18-29)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+39)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+14)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll (+4-4)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+28-16)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll (+50-50)
(modified) llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll (+1-1)
(modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+3-3)
(modified) llvm/test/Transforms/LoopVectorize/reverse_induction.ll (+5-5)
(modified) llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll (+12-12)
(modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+2-1)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 67a51c12b508e..d5aeb4feb19ba 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1541,6 +1541,12 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                           cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
                           0, cast<VectorType>(ICA.getReturnType()));
   }
+  case Intrinsic::experimental_vp_reverse: {
+    return getShuffleCost(TTI::SK_Reverse,
+                          cast<VectorType>(ICA.getReturnType()),
+                          cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
+                          0, cast<VectorType>(ICA.getReturnType()));
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b01c8b02ec66a..94782c33f5bda 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8880,6 +8880,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
   // bring the VPlan to its final state.
   // ---------------------------------------------------------------------------
 
+  // Adjust the result of reverse memory accesses.
+  VPlanTransforms::runPass(VPlanTransforms::adjustRecipesForReverseAccesses,
+                           *Plan);
+
   // Adjust the recipes for any inloop reductions.
   adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 61b5ccd85bc6e..55175a889d0e0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -970,6 +970,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
     // It produces the lane index across all unrolled iterations. Unrolling will
     // add all copies of its original operand as additional operands.
     FirstActiveLane,
+    // Returns a reversed vector for the operand.
+    Reverse,
 
     // The opcodes below are used for VPInstructionWithType.
     //
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index f3b99fe34c069..f87b6de42c8b8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -126,6 +126,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
     return IntegerType::get(Ctx, 1);
   case VPInstruction::Broadcast:
   case VPInstruction::PtrAdd:
+  case VPInstruction::Reverse:
     // Return the type based on first operand.
     return inferScalarType(R->getOperand(0));
   case VPInstruction::BranchOnCond:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1a38932ef99fe..b4ed4ef3147c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -444,6 +444,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
   case VPInstruction::ExtractPenultimateElement:
   case VPInstruction::FirstActiveLane:
   case VPInstruction::Not:
+  case VPInstruction::Reverse:
     return 1;
   case Instruction::ICmp:
   case Instruction::FCmp:
@@ -873,6 +874,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
 
     return Res;
   }
+  case VPInstruction::Reverse: {
+    return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse");
+  }
   default:
     llvm_unreachable("Unsupported opcode for instruction");
   }
@@ -948,6 +952,13 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
                                   I32Ty, {Arg0Ty, I32Ty, I1Ty});
     return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
   }
+  case VPInstruction::Reverse: {
+    assert(VF.isVector() && "Reverse operation must be vector type");
+    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
+    return Ctx.TTI.getShuffleCost(
+        TargetTransformInfo::SK_Reverse, cast<VectorType>(VectorTy),
+        cast<VectorType>(VectorTy), {}, Ctx.CostKind, 0);
+  }
   case VPInstruction::ExtractPenultimateElement:
     if (VF == ElementCount::getScalable(1))
       return InstructionCost::getInvalid();
@@ -1033,6 +1044,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
   case VPInstruction::WideIVStep:
   case VPInstruction::StepVector:
   case VPInstruction::ReductionStartVector:
+  case VPInstruction::Reverse:
     return false;
   default:
     return true;
@@ -1179,6 +1191,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::ReductionStartVector:
     O << "reduction-start-vector";
     break;
+  case VPInstruction::Reverse:
+    O << "reverse";
+    break;
   default:
     O << Instruction::getOpcodeName(getOpcode());
   }
@@ -2967,12 +2982,7 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
     Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,
                                     OpInfo, &Ingredient);
   }
-  if (!Reverse)
-    return Cost;
-
-  return Cost += Ctx.TTI.getShuffleCost(
-             TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
-             cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
+  return Cost;
 }
 
 void VPWidenLoadRecipe::execute(VPTransformState &State) {
@@ -3004,8 +3014,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
     NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
   }
   applyMetadata(*cast<Instruction>(NewLI));
-  if (Reverse)
-    NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
   State.set(this, NewLI);
 }
 
@@ -3061,8 +3069,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
       0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
   applyMetadata(*NewLI);
   Instruction *Res = NewLI;
-  if (isReverse())
-    Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
   State.set(this, Res);
 }
 
@@ -3083,12 +3089,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
   InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
       Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
-  if (!Reverse)
-    return Cost;
 
-  return Cost + Ctx.TTI.getShuffleCost(
-                    TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
-                    cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
+  return Cost;
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3118,13 +3120,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
   }
 
   Value *StoredVal = State.get(StoredVPValue);
-  if (isReverse()) {
-    // If we store to reverse consecutive memory locations, then we need
-    // to reverse the order of elements in the stored value.
-    StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
-    // We don't want to update the value in the map as it might be used in
-    // another expression. So don't call resetVectorValue(StoredVal).
-  }
   Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
   Instruction *NewSI = nullptr;
   if (CreateScatter)
@@ -3154,8 +3149,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
   CallInst *NewSI = nullptr;
   Value *StoredVal = State.get(StoredValue);
   Value *EVL = State.get(getEVL(), VPLane(0));
-  if (isReverse())
-    StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
   Value *Mask = nullptr;
   if (VPValue *VPMask = getMask()) {
     Mask = State.get(VPMask);
@@ -3196,12 +3189,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
   InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
       Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
-  if (!Reverse)
-    return Cost;
 
-  return Cost + Ctx.TTI.getShuffleCost(
-                    TargetTransformInfo::SK_Reverse, cast<VectorType>(Ty),
-                    cast<VectorType>(Ty), {}, Ctx.CostKind, 0);
+  return Cost;
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 730deb0686b2a..cf41b6d00f285 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2172,6 +2172,14 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
                                             VPI->getDebugLoc());
         }
 
+        if (VPI->getOpcode() == VPInstruction::Reverse) {
+          SmallVector<VPValue *> Ops(VPI->operands());
+          Ops.append({&AllOneMask, &EVL});
+          return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_reverse,
+                                            Ops, TypeInfo.inferScalarType(VPI),
+                                            VPI->getDebugLoc());
+        }
+
         VPValue *LHS, *RHS;
         // Transform select with a header mask condition
         //   select(header_mask, LHS, RHS)
@@ -3347,3 +3355,34 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
       MDB.createBranchWeights({1, VectorStep - 1}, /*IsExpected=*/false);
   MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
 }
+
+void VPlanTransforms::adjustRecipesForReverseAccesses(VPlan &Plan) {
+  if (Plan.hasScalarVFOnly())
+    return;
+
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
+    for (VPRecipeBase &R : *VPBB) {
+      auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R);
+      if (!MemR || !MemR->isReverse())
+        continue;
+
+      if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
+        auto *Reverse =
+            new VPInstruction(VPInstruction::Reverse, {L}, L->getDebugLoc());
+        Reverse->insertAfter(L);
+        L->replaceAllUsesWith(Reverse);
+        Reverse->setOperand(0, L);
+        continue;
+      }
+
+      if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
+        VPValue *StoredVal = S->getStoredValue();
+        auto *Reverse = new VPInstruction(VPInstruction::Reverse, {StoredVal},
+                                          S->getDebugLoc());
+        Reverse->insertBefore(S);
+        S->setOperand(1, Reverse);
+      }
+    }
+  }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 40885cd52a127..abe592247e2de 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -239,6 +239,20 @@ struct VPlanTransforms {
   /// Add branch weight metadata, if the \p Plan's middle block is terminated by
   /// a BranchOnCond recipe.
   static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF);
+
+  /// Add reverse recipes for reverse memory accesses.
+  /// For reverse loads, transform
+  ///   WIDEN ir<%L> = load vp<%addr>
+  /// into
+  ///   WIDEN ir<%L> = load vp<%addr>
+  ///   EMIT   vp<%RevL> = reverse ir<%L>
+  ///
+  /// For reverse stores, transform
+  ///   WIDEN store vp<%addr>, ir<%SVal>
+  /// into
+  ///   EMIT   vp<%RevS> = reverse ir<%SVal>
+  ///   WIDEN  store vp<%addr>, vp<%RevS>
+  static void adjustRecipesForReverseAccesses(VPlan &Plan);
 };
 
 } // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
index 9485d827ced40..c838c63545341 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -22,8 +22,8 @@ define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
 ; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
 ; CHECK: %[[REVERSE7:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[WIDEMSKLOAD]])
 ; CHECK: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[REVERSE7]]
-; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
 ; CHECK: %[[REVERSE8:.*]] = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> %[[FADD]])
+; CHECK: %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
 ; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[REVERSE8]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
 
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index 1dd49ecf85b81..d6f619cce54a0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -37,8 +37,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -24
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -56
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
index 09b274de30214..6d55f7369f01e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
@@ -165,8 +165,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
 ; RV64-UF2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP15]]
 ; RV64-UF2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]]
 ; RV64-UF2-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4
-; RV64-UF2-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
 ; RV64-UF2-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP18]], align 4
+; RV64-UF2-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
 ; RV64-UF2-NEXT:    [[REVERSE2:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD1]])
 ; RV64-UF2-NEXT:    [[TMP19:%.*]] = add <vscale x 4 x i32> [[REVERSE]], splat (i32 1)
 ; RV64-UF2-NEXT:    [[TMP20:%.*]] = add <vscale x 4 x i32> [[REVERSE2]], splat (i32 1)
@@ -180,8 +180,8 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) {
 ; RV64-UF2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP26]]
 ; RV64-UF2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]]
 ; RV64-UF2-NEXT:    [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP19]])
-; RV64-UF2-NEXT:    store <vscale x 4 x i32> [[REVERSE3]], ptr [[TMP25]], align 4
 ; RV64-UF2-NEXT:    [[REVERSE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP20]])
+; RV64-UF2-NEXT:    store <vscale x 4 x i32> [[REVERSE3]], ptr [[TMP25]], align 4
 ; RV64-UF2-NEXT:    store <vscale x 4 x i32> [[REVERSE4]], ptr [[TMP29]], align 4
 ; RV64-UF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
 ; RV64-UF2-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -371,8 +371,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
 ; RV64-UF2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP15]]
 ; RV64-UF2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP16]]
 ; RV64-UF2-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP14]], align 4
-; RV64-UF2-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]])
 ; RV64-UF2-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP18]], align 4
+; RV64-UF2-NEXT:    [[REVERSE:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]])
 ; RV64-UF2-NEXT:    [[REVERSE2:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD1]])
 ; RV64-UF2-NEXT:    [[TMP19:%.*]] = fadd <vscale x 4 x float> [[REVERSE]], splat (float 1.000000e+00)
 ; RV64-UF2-NEXT:    [[TMP20:%.*]] = fadd <vscale x 4 x float> [[REVERSE2]], splat (float 1.000000e+00)
@@ -386,8 +386,8 @@ define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) {
 ; RV64-UF2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP26]]
 ; RV64-UF2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]]
 ; RV64-UF2-NEXT:    [[REVERSE3:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP19]])
-; RV64-UF2-NEXT:    store <vscale x 4 x float> [[REVERSE3]], ptr [[TMP25]], align 4
 ; RV64-UF2-NEXT:    [[REVERSE4:%.*]] = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[TMP20]])
+; RV64-UF2-NEXT:    store <vscale x 4 x float> [[REVERSE3]], ptr [[TMP25]], align 4
 ; RV64-UF2-NEXT:    store <vscale x 4 x float> [[REVERSE4]], ptr [[TMP29]], align 4
 ; RV64-UF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
 ; RV64-UF2-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index dd8b7d6ea7e42..6d49a7fc16ad5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -105,10 +105,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
 ; CHECK-NEXT:      vp<%9> = vector-end-pointer inbounds ir<%arrayidx>, vp<%0>
 ; CHECK-NEXT:      WIDEN ir<%1> = load vp<%9>
-; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%1>, ir<1>
+; CHECK-NEXT:      EMIT vp<%10> = reverse ir<%1>
+; CHECK-NEXT:      WIDEN ir<%add9> = add vp<%10>, ir<1>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<%10> = vector-end-pointer inbounds ir<%arrayidx3>, vp<%0>
-; CHECK-NEXT:      WIDEN store vp<%10>, ir<%add9>
+; CHECK-NEXT:      vp<%11> = vector-end-pointer inbounds ir<%arrayidx3>, vp<%0>
+; CHECK-NEXT:      EMIT vp<%12> = reverse ir<%add9>
+; CHECK-NEXT:      WIDEN store vp<%11>, vp<%12>
 ; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
 ; CHECK-NEXT:      EMIT branch-on-count vp<%index.next>, vp<%2>
 ; CHECK-NEXT:    No successors
@@ -167,8 +169,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV(REG): At #9 Interval # 3
 ; CHECK-NEXT:  LV(REG): At #10 Interval # 3
 ; CHECK-NEXT:  LV(REG): At #11 Interval # 3
-; CHECK-NEXT:  LV(REG): At #12 Interval # 2
-; CHECK-NEXT:  LV(REG): At #13 Interval # 2
+; CHECK-NEXT:  LV(REG): At #12 Interval # 3
+; CHECK-NEXT:  LV(REG): At #13 Interval # 3
+; CHECK-NEXT:  LV(REG): At #14 Interval # 2
+; CHECK-NEXT:  LV(REG): At #15 Interval # 2
 ; CHECK-NEXT:  LV(REG): VF = vscale x 4
 ; CHECK-NEXT:  LV(REG): Found max...
[truncated]

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

lukel97

+1 on splitting this out, I think this works well with the direction of splitting up big recipes into smaller ones. Just an idea about possibly inserting the reverses in tryToWiden but otherwise generally LGTM

lukel97 · 2025-07-17T11:44:21Z

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll

This is a cool optimisation, I guess the LICM transform pulls the VPInstruction::Reverse out of the loop body so convertToEVLRecipes doesn't see it?

Yes. But I think it's fine that this isn't converted into vp.reverse here, since the operand of reverse that can be hoisted by LICM should be uniform. We could even remove the reverse operation entirely in this case in the future.

lukel97 · 2025-07-17T12:11:15Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

I just did a quick scan and it looks like the only places where reverse is set is in VPRecipeBuilder::tryToWidenMemory. Instead of introducing another transform, should we just insert the VPInstruction::Reverses there to avoid having to iterate over all the recipes again?

This way would mean we could also remove the Reverse field from VPWidenMemoryRecipe

I was initially worried this might affect interleaved accesses, but I was overthinking it. So far, generating the reverse directly in tryToWidenMemory seems fine.
9d6136b7a21a91fe5c479b9071c113b7802f062f

This way would mean we could also remove the Reverse field from VPWidenMemoryRecipe

We can't remove the Reverse field from VPWidenMemoryRecipe. We still need the reverse mask if it's a reverse access. I also don’t plan to separate the reverse mask either, as I think it would not bring benefit.

Can we also just reverse the mask too at construction?

That’s possible, but the reason I’m not doing it for now is that, in case some reverse operations can't be eliminated, we might want to convert reverse accesses into strided accesses with a stride of -1. Keeping the Reverse field could make it easier to identify the target recipes that need conversion. Also, reverse masks generally can not do permutation elimination, I think. So that’s why I haven’t done it this way.

What I guess I would eventually like to see is that our optimisations to remove the header masks just become plain old peepholes, written pattern match style like in similarRecipes.

E.g. for a regular load we would try and match:

(load ptr, header-mask) -> (vp.load ptr, all-true, evl)

And if we were to split out the reverses for both the data and mask into separate recipes, and add a VF operand like what we currently do for the end pointer, we would also have:

(reverse (load (end-ptr p, vf), (reverse header-mask, vf)), vf) -> (reverse (vp.load (end-ptr p, evl), all-true, evl), evl) OR (vp.strided-load p, all-true, stride=-1, evl)

I think these patterns seem simple enough, and we could probably write them with the VPlanPatternMatch. Most importantly, these transformations don't change the semantics and are just optimisations. So if somehow we miss one of these transforms it will still be correct.

For permutation elimination we would just need to have:

(reverse (reverse x N) N) -> x

Reverse access can indeed be pattern-matched.
1392a872f0b69310340088d6323f1ae3735838c6
I’ve separated out the reverse mask, but this is not easy as we thought. :(
In addition to affecting the cost model since the cost of the reverse mask is currently not computed. In EVL lowering, it also require extra handling for the reverse mask.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn · 2025-07-31T09:31:08Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Now that the load/store doesn't handle reversing, it should not need the flag to indicate it is reversing

There was a similar discussion earlier: #146525 (comment)
I think it would be good to continue the discussion in the same comment thread.

fhahn · 2025-07-31T09:34:24Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

The code structure in the function seems inconsistent; converting FirstOrderRecurrenceSplice is handled inline, while handling Reverse is handled in the function. Can we merge the loops, as now we need to unconditionally iterate over all recipes in the loop region anyways?

Same, redirect to #146525 (comment).

@lukel97 @fhahn
7c2493d
The conversion to vp.reverse now uses the new approach. It is no necessary to visit every recipe.

github-actions · 2025-08-14T15:00:02Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

fhahn · 2025-08-14T15:27:47Z

llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll

now not vectorized any longer?

Yes, also caused by separating the reverse mask from reverse access recipes.
The change is moved to #155579, and will investigate it.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Mel-Chen · 2025-08-27T09:20:16Z

#155579
Since separating the reverse mask also involves cost model and requires more implementation during EVL lowering, I decided to split these two changes to reduce the complexity of this PR.

artagnon

Seems like a good, positive direction.

artagnon · 2025-08-27T14:49:16Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+    auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
+      if (R->hasMoreThanOneUniqueUser() || R->getNumUsers() == 0)
+        return nullptr;
+      return dyn_cast<VPRecipeBase>(*R->user_begin());
+    };


Would be good to introduce getUniqueUndroppableUser() or hasOneUse() similar to the ones in IR? I think one of @lukel97's patches could also use it.

I don't think we need to check for zero users in my linked PR, since we're checking the uses of an operand. So it will always have at least one use

Although it might not be related, this comment made me find that maybe swapping the two conditions could be better.
9546b23

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

artagnon · 2025-08-27T14:55:53Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+    if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(EVLRecipe);
+        MemR && match(MemR->getAddr(),
+                      m_VectorEndPointer(m_VPValue(), m_Specific(&EVL)))) {


Suggested change

if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(EVLRecipe);

MemR && match(MemR->getAddr(),

m_VectorEndPointer(m_VPValue(), m_Specific(&EVL)))) {

if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(EVLRecipe))

if (match(MemR->getAddr(),

m_VectorEndPointer(m_VPValue(), m_Specific(&EVL))))) {

I think we generally avoid this pattern as it is uncommon and it does't help much with readability.

Ok, we can early break it.
fe5532a

artagnon · 2025-08-27T14:58:59Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

  else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||
           Opcode == Instruction::FPExt) {


Not sure I understand why the code is guarded by Trunc, FPTrunc, FPExt, ZExt, or SExt opcodes?

This code is used to determine TTI::CastContextHint. Perhaps for other cast opcodes, there isn’t currently a situation that requires this hint. But I think that’s a separate issue. The change here is just to ensure that CastContextHint::Reversed is still correctly propagated after the reverse operation is separated out.

artagnon · 2025-08-27T14:59:45Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+        assert(LoadR->getNumUsers() == 1 &&
+               "Unexpected user number of reverse load");


Not sure why the load/store cases are asymmetric? Maybe add a comment about why this assert should hold?

For a load we need to reverse the result, and for a store we need to reverse the operand being stored

Maybe add a comment about why this assert should hold?

if (auto *Load = dyn_cast<LoadInst>(I)) { auto *LoadR = new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, VPIRMetadata(*Load, LVer), Load->getDebugLoc()); if (Reverse) { Builder.insert(LoadR); return new VPInstruction(VPInstruction::Reverse, LoadR, LoadR->getDebugLoc()); } return LoadR; }

Because there must be a reverse operation after the load if it is a reverse load.
In the future, the assertion should be removed after vectorizer supported permutation elimination in SimplifyRecipe.

artagnon · 2025-08-27T15:02:13Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+    // TODO: Extend conversion along the def-use/use-def chain, as reverse
+    // operations may be eliminated or moved in the future.


Hm, seems like we may be leaving a footgun here, although I'm not sure how it can be improved?

This is because in the future, a reverse might be eliminated or moved by SimplifyRecipe.
Implementing this wouldn’t be too difficult; for a reverse load as example, we could collect users recursively with collectUsersRecursively and transform the reverse accordingly. However, this situation doesn’t currently occur, so there’s no example to verify correctness, which is why I haven’t done it yet.
Do I need to tackle this TODO now?

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

lukel97 · 2025-09-01T11:34:58Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+    auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
+      if (R->hasMoreThanOneUniqueUser() || R->getNumUsers() == 0)
+        return nullptr;
+      return dyn_cast<VPRecipeBase>(*R->user_begin());
+    };


I don't think we need to check for zero users in my linked PR, since we're checking the uses of an operand. So it will always have at least one use

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

lukel97 · 2025-09-01T11:43:12Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+        assert(LoadR->getNumUsers() == 1 &&
+               "Unexpected user number of reverse load");


For a load we need to reverse the result, and for a store we need to reverse the operand being stored

Stacked on llvm#155383 Currently in optimizeMaskToEVL we convert every widened load, store or reduction to a VP predicated recipe with EVL, regardless of whether or not it uses the header mask. So currently we have to be careful when working on other parts VPlan to make sure that the EVL transform doesn't break or transform something incorrectly, because it's not a semantics preserving transform. Forgetting to do so has caused miscompiles before, like the case that was fixed in llvm#113667 This PR rewrites it to work in terms of pattern matching, so it now only converts a recipe to a VP predicated recipe if it uses the header mask. It also splits out the load/store transforms into separate patterns for reversed and non-reversed, which should make llvm#146525 easier to implement and reason about. After this the transform should be a true optimisation and not change any semantics, so it shouldn't miscompile things if other parts of VPlan change. This fixes llvm#152541, and allows us to move addExplicitVectorLength into tryToBuildVPlanWithVPRecipes in llvm#153144

fhahn · 2025-09-11T13:15:29Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+    Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
+    return Ctx.TTI.getShuffleCost(
+        TargetTransformInfo::SK_Reverse, cast<VectorType>(VectorTy),
+        cast<VectorType>(VectorTy), {}, Ctx.CostKind, 0);


Suggested change

Type *VectorTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

return Ctx.TTI.getShuffleCost(

TargetTransformInfo::SK_Reverse, cast<VectorType>(VectorTy),

cast<VectorType>(VectorTy), {}, Ctx.CostKind, 0);

Type *VectorTy = cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));

return Ctx.TTI.getShuffleCost(

TargetTransformInfo::SK_Reverse, VectorTy,

VectorTy, {}, Ctx.CostKind, 0);

Could you also add `/Arg=/ to the arguments passing {} and 0?

Sure.
5d402e2

fhahn · 2025-09-11T13:18:30Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+    if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
+      if (match(Recipe, m_VPInstruction<VPInstruction::Reverse>(m_VPValue())))
+        Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
+      if (Recipe)
+        CCH = ComputeCCH(Recipe);


Hmm, if we have a shuffle inbetween the load and a cast for example, can the cast still be folded into the load in most cases? Curious if this may have surfaced an in-accuracy of the current cost modeling.

RISC-V currently doesn’t have memory access instructions like this; AArch64 seems to have some. But I’m not sure if AArch64 supports combining load + shuffle + cast into a single instruction (I haven’t found one so far). Also, the current AArch64 TTI doesn’t use CastContextHint::Reversed for its decisions. I’m thinking that if load + reverse + cast can’t be combined into a load, maybe we could just remove CastContextHint::Reversed and return CastContextHint::None instead. What do you think?
cc. @david-arm

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

fhahn · 2025-09-11T13:22:12Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+          // Skip if the stored value is not defined in the loop region.
+          if (!StoredVal->isDefinedOutsideLoopRegions()) {


Hmm, is this correct even if the value outside the region is a vector other than a broadcast?

I used to worry about the same issue. Since vp.reverse requires EVL as its operand, a reverse defined in the per-header cannot be converted into vp.reverse unless we sink the reverse back into the vectorized loop. But I can’t imagine of a situation where, inside the vectorized loop, we would store a vector defined in the perheader with different values in each lane.

Do you think we should be conservative and first sink the reverse defined in the perheader back into the vectorized loop body and convert it into vp.reverse, or is it reasonable to add an assertion here to ensure that, if all of stored value lanes are identical?

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

fhahn · 2025-09-11T13:24:10Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+    // Convert general reverse operations on loaded result into vp.reverse, when
+    // the VPVectorEndPointerRecipe adjusting the access address uses EVL
+    // instead of VF.
+    if (auto *LoadR = dyn_cast<VPWidenLoadEVLRecipe>(EVLRecipe)) {


Is there a reason we handle the load/store cases separately, instead of just converting all reverse operations? Could we mis-compile in the future if some other transform decides to create new reverse operations?

Is there a reason we handle the load/store cases separately, instead of just converting all reverse operations?

This is the conclusion from @lukel97 and me: we should convert only the recipes that are actually masked by the header mask, rather than converting all recipes into EVL recipes.
For now, converting all reverses in the vectorized loop into vp.reverse has the same effect as converting along the DU/UD chain from VPWidenLoadEVLRecipe/VPWidenStoreEVLRecipe, because this is the only usage scenario currently.

Could we mis-compile in the future if some other transform decides to create new reverse operations?

The conversion is based on the fact that masked reverse accesses use reverse(header mask) or reverse(header mask and mask) as the mask. A reversed header mask causes non-active lanes to go to the head, while VP intrinsics with EVL can only mask out non-active lanes at the tail, not at the head. That’s why we need a series of transformations: VPVectorEndPointer(ptr, VF) to VPVectorEndPointer(ptr, EVL) and reverse to vp.reverse.

If a new recipe is also masked by reverse(header mask) or reverse(header mask and mask), then converting the new recipe into an EVL recipe also requires converting the reverse into vp.reverse. But if it isn’t masked, there’s no need to convert it into vp.reverse.

…ed result and stored value

arguments for cost.

Mel-Chen requested review from fhahn, preames, alexey-bataev, ayalz, david-arm and ElvisWang123 July 1, 2025 13:22

llvmbot added backend:RISC-V vectorizers llvm:transforms labels Jul 1, 2025

This was referenced Jul 1, 2025

[LV][EVL] Generate negative strided load/store for reversed load/store #123608

Open

[VPlan] Fix first-order splices without header mask not using EVL #146672

Merged

lukel97 reviewed Jul 10, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

Mel-Chen force-pushed the reverse branch from 3c17ec7 to edcd8e2 Compare July 17, 2025 05:47

lukel97 reviewed Jul 17, 2025

View reviewed changes

Mel-Chen force-pushed the reverse branch from edcd8e2 to 19849af Compare July 24, 2025 09:20

alexey-bataev reviewed Jul 24, 2025

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

Mel-Chen force-pushed the reverse branch from 19849af to 2c6b5ac Compare July 30, 2025 10:07

lukel97 reviewed Jul 30, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

Mel-Chen force-pushed the reverse branch from 2c6b5ac to f308527 Compare July 31, 2025 09:14

llvmbot added the backend:PowerPC label Jul 31, 2025

fhahn reviewed Jul 31, 2025

View reviewed changes

Mel-Chen force-pushed the reverse branch from f308527 to 70966f7 Compare August 7, 2025 07:42

lukel97 mentioned this pull request Aug 8, 2025

Split VPlanTransforms::addExplicitVectorLength into variable step transformation and header mask optimisation #152541

Open

Mel-Chen force-pushed the reverse branch from 70966f7 to 1392a87 Compare August 14, 2025 14:57

Mel-Chen force-pushed the reverse branch from 1392a87 to 3b9f77e Compare August 14, 2025 15:17

fhahn reviewed Aug 14, 2025

View reviewed changes

lukel97 mentioned this pull request Aug 18, 2025

[VPlan] EVL transform VPVectorEndPointerRecipe alongisde load/store recipes. NFC #152542

Merged

Mel-Chen force-pushed the reverse branch from 3b9f77e to 7c2493d Compare August 27, 2025 08:49

Mel-Chen mentioned this pull request Aug 27, 2025

[VPlan] Extract reverse mask from reverse accesses #155579

Open

artagnon reviewed Aug 27, 2025

View reviewed changes

lukel97 reviewed Sep 1, 2025

View reviewed changes

Mel-Chen force-pushed the reverse branch 3 times, most recently from fe5532a to ef2d027 Compare September 11, 2025 11:17

fhahn reviewed Sep 11, 2025

View reviewed changes

Mel-Chen added 19 commits September 14, 2025 23:04

[VPlan] Extract reverse operation for reverse accesses

62bb464

Remove cost model for vp_reverse

940d720

adjust recipe in tryToWidenMemory

34a90dd

remove braces

ca786d4

fix from comment

8282e27

postpone the removing dead reverse operations

8493926

fix TTI::CastContextHint

cadd522

Change planContainsAdditionalSimplifications for licm

1352e7c

Remove lammbda static

c710543

Change the way to convert reverse operation When EVL lowering.

99c119a

nfc, Swap order of user checks in GetOnlyUser

13976d2

nfc, Use operand type instead of recipe type in toVectorTy

d2a4029

nfc, Add comment for VPlanPatternMatch

e91d8a1

nfc, Simplify Reverse construction for load

f89471c

Skip the candidate is already a vp.reverse

0048e62

nfc, early break if it is not a reverse

2a6a58f

Split the appoarch that transforms the reverse to vp.reverse for load…

116d967

…ed result and stored value

nfc, reduce the number of cast<VectorType> and comment the const

5d402e2

arguments for cost.

nfc, refine comments

15e7047

Mel-Chen force-pushed the reverse branch from ef2d027 to 15e7047 Compare September 15, 2025 08:50

		else if (Opcode == Instruction::ZExt \|\| Opcode == Instruction::SExt \|\|
		Opcode == Instruction::FPExt) {

		assert(LoadR->getNumUsers() == 1 &&
		"Unexpected user number of reverse load");

		// TODO: Extend conversion along the def-use/use-def chain, as reverse
		// operations may be eliminated or moved in the future.

		// Skip if the stored value is not defined in the loop region.
		if (!StoredVal->isDefinedOutsideLoopRegions()) {

[VPlan] Extract reverse operation for reverse accesses #146525

Are you sure you want to change the base?

[VPlan] Extract reverse operation for reverse accesses #146525

Uh oh!

Conversation

Mel-Chen commented Jul 1, 2025

Uh oh!

llvmbot commented Jul 1, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

lukel97 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Aug 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Mel-Chen commented Aug 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

llvmbot commented Jul 1, 2025 •

edited

Loading

github-actions bot commented Aug 14, 2025 •

edited

Loading

Mel-Chen commented Aug 27, 2025 •

edited

Loading