From 09970932482dd307e8e7fee5ce17a58989294152 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 12 Sep 2023 14:35:22 +0100 Subject: [PATCH 1/2] [RISCV] Reduce LMUL when index is known when lowering insert_vector_elt Continuing on from #65997, if the index of insert_vector_elt is a constant then we can work out what the minimum number of registers will be needed for the slideup and choose a smaller type to operate on. This reduces the LMUL for not just the slideup but also for the scalar insert. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 25 ++ .../RISCV/rvv/fixed-vectors-insert-i1.ll | 2 +- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 14 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 292 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 34 +- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll | 6 +- .../CodeGen/RISCV/rvv/insertelt-int-rv32.ll | 54 ++-- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 46 +-- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 5 +- 9 files changed, 252 insertions(+), 226 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2b8e5aeeb8640..95dc20269fa14 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7458,6 +7458,19 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + MVT OrigContainerVT = ContainerVT; + SDValue OrigVec = Vec; + // If we know the index we're going to insert at, we can shrink down Vec so + // we're performing the vslide1down on a smaller LMUL. + if (auto *CIdx = dyn_cast(Idx)) { + if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(), + DL, DAG, Subtarget)) { + ContainerVT = *ShrunkVT; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, + DAG.getVectorIdxConstant(0, DL)); + } + } + MVT XLenVT = Subtarget.getXLenVT(); bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64; @@ -7482,6 +7495,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL; if (isNullConstant(Idx)) { Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL); + + if (ContainerVT != OrigContainerVT) + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + Vec, DAG.getVectorIdxConstant(0, DL)); if (!VecVT.isFixedLengthVector()) return Vec; return convertFromScalableVector(VecVT, Vec, DAG, Subtarget); @@ -7514,6 +7531,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, // Bitcast back to the right container type. ValInVec = DAG.getBitcast(ContainerVT, ValInVec); + if (ContainerVT != OrigContainerVT) + ValInVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + ValInVec, DAG.getVectorIdxConstant(0, DL)); if (!VecVT.isFixedLengthVector()) return ValInVec; return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget); @@ -7544,6 +7565,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, Policy = RISCVII::TAIL_AGNOSTIC; SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec, Idx, Mask, InsertVL, Policy); + + if (ContainerVT != OrigContainerVT) + Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec, + Slideup, DAG.getVectorIdxConstant(0, DL)); if (!VecVT.isFixedLengthVector()) return Slideup; return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index b3cbad3d9e6b1..f7737784d4ca5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 373a96356a207..cbcca9d2696f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -40,7 +40,7 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v32i32_0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 0 @@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 4 @@ -92,7 +92,7 @@ define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v64i32_0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %b = insertelement <64 x i32> %a, i32 %y, i32 0 @@ -390,7 +390,7 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) { ; CHECK-LABEL: insertelt_v8i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %b = insertelement <8 x i64> %a, i64 -1, i32 0 @@ -468,7 +468,7 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) { ; CHECK-LABEL: insertelt_c6_v8i64_0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 6 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %b = insertelement <8 x i64> %a, i64 6, i32 0 @@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a2, 6 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 49724cbc44182..480e5c2f8f2b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2424,14 +2424,14 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 @@ -2439,7 +2439,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2447,7 +2447,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -2518,7 +2518,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB35_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2531,9 +2531,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2579,7 +2579,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 @@ -2591,9 +2591,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10 @@ -2668,7 +2668,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB36_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2681,9 +2681,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2729,7 +2729,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 @@ -2741,9 +2741,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 ; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10 @@ -2821,7 +2821,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB37_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2835,9 +2835,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2885,7 +2885,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 @@ -2899,7 +2899,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 @@ -2980,7 +2980,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB38_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2993,9 +2993,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3041,7 +3041,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 @@ -3053,9 +3053,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 ; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10 @@ -3131,7 +3131,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB39_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -3144,9 +3144,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3192,7 +3192,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 @@ -3204,9 +3204,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 ; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10 @@ -3285,7 +3285,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a3 ; RV64ZVE32F-NEXT: .LBB40_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -3299,9 +3299,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3349,7 +3349,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 @@ -3362,9 +3362,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 ; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10 @@ -3438,7 +3438,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -3455,7 +3455,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB41_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -3499,9 +3499,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_6 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7 @@ -3512,7 +3512,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 @@ -8227,14 +8227,14 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 @@ -8242,7 +8242,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -8250,7 +8250,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -8321,7 +8321,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB74_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8334,9 +8334,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8382,7 +8382,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 @@ -8394,9 +8394,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10 @@ -8471,7 +8471,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB75_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8484,9 +8484,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8532,7 +8532,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_6 @@ -8544,9 +8544,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_7 ; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10 @@ -8624,7 +8624,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB76_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8638,9 +8638,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8688,7 +8688,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_6 @@ -8702,7 +8702,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_7 @@ -8783,7 +8783,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB77_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8796,9 +8796,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8844,7 +8844,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_6 @@ -8856,9 +8856,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_7 ; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10 @@ -8934,7 +8934,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8947,9 +8947,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8995,7 +8995,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 @@ -9007,9 +9007,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 ; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10 @@ -9088,7 +9088,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB79_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -9102,9 +9102,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -9152,7 +9152,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_6 @@ -9165,9 +9165,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_7 ; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10 @@ -9241,7 +9241,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9258,7 +9258,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -9302,9 +9302,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v14, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_6 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7 @@ -9315,7 +9315,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 @@ -12382,7 +12382,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -12395,7 +12395,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -12417,9 +12417,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 4 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -12433,7 +12433,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -12456,9 +12456,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 9 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9 ; RV64ZVE32F-NEXT: .LBB98_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 @@ -12472,7 +12472,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 ; RV64ZVE32F-NEXT: .LBB98_17: # %else29 ; RV64ZVE32F-NEXT: slli a2, a1, 52 @@ -12484,9 +12484,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 @@ -12497,9 +12497,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12 ; RV64ZVE32F-NEXT: .LBB98_21: # %else35 ; RV64ZVE32F-NEXT: slli a2, a1, 50 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_23 @@ -12510,9 +12510,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 @@ -12661,7 +12661,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_6 @@ -12672,9 +12672,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_7 ; RV64ZVE32F-NEXT: j .LBB98_8 @@ -12684,7 +12684,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_12 @@ -12695,9 +12695,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 ; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22 @@ -12707,9 +12707,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 8 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 ; RV64ZVE32F-NEXT: j .LBB98_15 @@ -12719,7 +12719,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14 ; RV64ZVE32F-NEXT: slli a2, a1, 48 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_25 @@ -12730,9 +12730,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15 ; RV64ZVE32F-NEXT: slli a2, a1, 47 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_26 ; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 4bd9f7befa52a..060e99691cb13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -109,7 +109,7 @@ define @insertelt_nxv4f16_idx( %v, half % define @insertelt_nxv8f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 @@ -119,7 +119,7 @@ define @insertelt_nxv8f16_0( %v, half %el define @insertelt_nxv8f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -143,7 +143,7 @@ define @insertelt_nxv8f16_idx( %v, half % define @insertelt_nxv16f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 @@ -153,7 +153,7 @@ define @insertelt_nxv16f16_0( %v, half define @insertelt_nxv16f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -177,7 +177,7 @@ define @insertelt_nxv16f16_idx( %v, hal define @insertelt_nxv32f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 @@ -187,7 +187,7 @@ define @insertelt_nxv32f16_0( %v, half define @insertelt_nxv32f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -279,7 +279,7 @@ define @insertelt_nxv2f32_idx( %v, floa define @insertelt_nxv4f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 @@ -289,7 +289,7 @@ define @insertelt_nxv4f32_0( %v, float define @insertelt_nxv4f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -313,7 +313,7 @@ define @insertelt_nxv4f32_idx( %v, floa define @insertelt_nxv8f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 @@ -323,7 +323,7 @@ define @insertelt_nxv8f32_0( %v, float define @insertelt_nxv8f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -347,7 +347,7 @@ define @insertelt_nxv8f32_idx( %v, floa define @insertelt_nxv16f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 @@ -357,7 +357,7 @@ define @insertelt_nxv16f32_0( %v, flo define @insertelt_nxv16f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -415,7 +415,7 @@ define @insertelt_nxv1f64_idx( %v, do define @insertelt_nxv2f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv2f64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 @@ -449,7 +449,7 @@ define @insertelt_nxv2f64_idx( %v, do define @insertelt_nxv4f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 @@ -459,7 +459,7 @@ define @insertelt_nxv4f64_0( %v, doub define @insertelt_nxv4f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -483,7 +483,7 @@ define @insertelt_nxv4f64_idx( %v, do define @insertelt_nxv8f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 @@ -493,7 +493,7 @@ define @insertelt_nxv8f64_0( %v, doub define @insertelt_nxv8f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll index a7bd15f2a7b33..911072d9571ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -149,7 +149,7 @@ define @insertelt_nxv16i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -184,7 +184,7 @@ define @insertelt_nxv32i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -219,7 +219,7 @@ define @insertelt_nxv64i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 39f94eab2aa66..883906772874e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -141,7 +141,7 @@ define @insertelt_nxv8i8_idx( %v, i8 signext define @insertelt_nxv16i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -151,7 +151,7 @@ define @insertelt_nxv16i8_0( %v, i8 signext define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -175,7 +175,7 @@ define @insertelt_nxv16i8_idx( %v, i8 signe define @insertelt_nxv32i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -185,7 +185,7 @@ define @insertelt_nxv32i8_0( %v, i8 signext define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -209,7 +209,7 @@ define @insertelt_nxv32i8_idx( %v, i8 signe define @insertelt_nxv64i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -219,7 +219,7 @@ define @insertelt_nxv64i8_0( %v, i8 signext define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -345,7 +345,7 @@ define @insertelt_nxv4i16_idx( %v, i16 sign define @insertelt_nxv8i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -355,7 +355,7 @@ define @insertelt_nxv8i16_0( %v, i16 signex define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -379,7 +379,7 @@ define @insertelt_nxv8i16_idx( %v, i16 sign define @insertelt_nxv16i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -389,7 +389,7 @@ define @insertelt_nxv16i16_0( %v, i16 sig define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -413,7 +413,7 @@ define @insertelt_nxv16i16_idx( %v, i16 s define @insertelt_nxv32i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -423,7 +423,7 @@ define @insertelt_nxv32i16_0( %v, i16 sig define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -515,7 +515,7 @@ define @insertelt_nxv2i32_idx( %v, i32 %elt define @insertelt_nxv4i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -525,7 +525,7 @@ define @insertelt_nxv4i32_0( %v, i32 %elt) define @insertelt_nxv4i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -549,7 +549,7 @@ define @insertelt_nxv4i32_idx( %v, i32 %elt define @insertelt_nxv8i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -559,7 +559,7 @@ define @insertelt_nxv8i32_0( %v, i32 %elt) define @insertelt_nxv8i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -583,7 +583,7 @@ define @insertelt_nxv8i32_idx( %v, i32 %elt define @insertelt_nxv16i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -593,7 +593,7 @@ define @insertelt_nxv16i32_0( %v, i32 %el define @insertelt_nxv16i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -655,7 +655,7 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt define @insertelt_nxv2i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret @@ -693,7 +693,7 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt define @insertelt_nxv4i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret @@ -704,10 +704,10 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslide1down.vx v12, v8, a0 ; CHECK-NEXT: vslide1down.vx v12, v12, a1 -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -731,7 +731,7 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt define @insertelt_nxv8i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: ret @@ -742,10 +742,10 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslide1down.vx v16, v8, a0 ; CHECK-NEXT: vslide1down.vx v16, v16, a1 -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -771,7 +771,7 @@ define @insertelt_nxv2i64_0_c10( %v) { ; CHECK-LABEL: insertelt_nxv2i64_0_c10: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 0 @@ -807,7 +807,7 @@ define @insertelt_nxv2i64_0_cn1( %v) { ; CHECK-LABEL: insertelt_nxv2i64_0_cn1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 1dd00197bbbb0..40767eefdd814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -141,7 +141,7 @@ define @insertelt_nxv8i8_idx( %v, i8 signext define @insertelt_nxv16i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -151,7 +151,7 @@ define @insertelt_nxv16i8_0( %v, i8 signext define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -175,7 +175,7 @@ define @insertelt_nxv16i8_idx( %v, i8 signe define @insertelt_nxv32i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -185,7 +185,7 @@ define @insertelt_nxv32i8_0( %v, i8 signext define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -209,7 +209,7 @@ define @insertelt_nxv32i8_idx( %v, i8 signe define @insertelt_nxv64i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 @@ -219,7 +219,7 @@ define @insertelt_nxv64i8_0( %v, i8 signext define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -345,7 +345,7 @@ define @insertelt_nxv4i16_idx( %v, i16 sign define @insertelt_nxv8i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -355,7 +355,7 @@ define @insertelt_nxv8i16_0( %v, i16 signex define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -379,7 +379,7 @@ define @insertelt_nxv8i16_idx( %v, i16 sign define @insertelt_nxv16i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -389,7 +389,7 @@ define @insertelt_nxv16i16_0( %v, i16 sig define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -413,7 +413,7 @@ define @insertelt_nxv16i16_idx( %v, i16 s define @insertelt_nxv32i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 @@ -423,7 +423,7 @@ define @insertelt_nxv32i16_0( %v, i16 sig define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -515,7 +515,7 @@ define @insertelt_nxv2i32_idx( %v, i32 sign define @insertelt_nxv4i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -525,7 +525,7 @@ define @insertelt_nxv4i32_0( %v, i32 signex define @insertelt_nxv4i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -549,7 +549,7 @@ define @insertelt_nxv4i32_idx( %v, i32 sign define @insertelt_nxv8i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -559,7 +559,7 @@ define @insertelt_nxv8i32_0( %v, i32 signex define @insertelt_nxv8i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -583,7 +583,7 @@ define @insertelt_nxv8i32_idx( %v, i32 sign define @insertelt_nxv16i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 @@ -593,7 +593,7 @@ define @insertelt_nxv16i32_0( %v, i32 sig define @insertelt_nxv16i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -653,7 +653,7 @@ define @insertelt_nxv1i64_idx( %v, i64 %elt define @insertelt_nxv2i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 @@ -689,7 +689,7 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt define @insertelt_nxv4i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 @@ -699,7 +699,7 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -725,7 +725,7 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt define @insertelt_nxv8i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 @@ -735,7 +735,7 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 73f651225da64..44a396ee29a8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -810,9 +810,10 @@ for.end: ; preds = %for.body define @cross_block_mutate( %a, %b, ; CHECK-LABEL: cross_block_mutate: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli a0, 6, e32, m2, tu, ma +; CHECK-NEXT: vsetivli a0, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %mask) { From 09d1751c450680b5f7cc856e6e77386db7142682 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 12 Sep 2023 14:47:52 +0100 Subject: [PATCH 2/2] Update comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 95dc20269fa14..7ec26983ebde4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7460,8 +7460,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, MVT OrigContainerVT = ContainerVT; SDValue OrigVec = Vec; - // If we know the index we're going to insert at, we can shrink down Vec so - // we're performing the vslide1down on a smaller LMUL. + // If we know the index we're going to insert at, we can shrink Vec so that + // we're performing the scalar inserts and slideup on a smaller LMUL. if (auto *CIdx = dyn_cast(Idx)) { if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(), DL, DAG, Subtarget)) {