diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 0a42c6faee290..95cea0c61acfd 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -130,6 +130,118 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) { } // namespace RISCVFeatures +bool RISCVII::vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits, + unsigned Log2SEW) { + // TODO: Handle Zvbb instructions + switch (Opcode) { + default: + return false; + + // 11.6. Vector Single-Width Shift Instructions + case RISCV::VSLL_VX: + case RISCV::VSRL_VX: + case RISCV::VSRA_VX: + // 12.4. Vector Single-Width Scaling Shift Instructions + case RISCV::VSSRL_VX: + case RISCV::VSSRA_VX: + // Only the low lg2(SEW) bits of the shift-amount value are used. + return Log2SEW <= Bits; + + // 11.7 Vector Narrowing Integer Right Shift Instructions + case RISCV::VNSRL_WX: + case RISCV::VNSRA_WX: + // 12.5. Vector Narrowing Fixed-Point Clip Instructions + case RISCV::VNCLIPU_WX: + case RISCV::VNCLIP_WX: + // Only the low lg2(2*SEW) bits of the shift-amount value are used. + return (Log2SEW + 1) <= Bits; + + // 11.1. Vector Single-Width Integer Add and Subtract + case RISCV::VADD_VX: + case RISCV::VSUB_VX: + case RISCV::VRSUB_VX: + // 11.2. Vector Widening Integer Add/Subtract + case RISCV::VWADDU_VX: + case RISCV::VWSUBU_VX: + case RISCV::VWADD_VX: + case RISCV::VWSUB_VX: + case RISCV::VWADDU_WX: + case RISCV::VWSUBU_WX: + case RISCV::VWADD_WX: + case RISCV::VWSUB_WX: + // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + case RISCV::VADC_VXM: + case RISCV::VADC_VIM: + case RISCV::VMADC_VXM: + case RISCV::VMADC_VIM: + case RISCV::VMADC_VX: + case RISCV::VSBC_VXM: + case RISCV::VMSBC_VXM: + case RISCV::VMSBC_VX: + // 11.5 Vector Bitwise Logical Instructions + case RISCV::VAND_VX: + case RISCV::VOR_VX: + case RISCV::VXOR_VX: + // 11.8. Vector Integer Compare Instructions + case RISCV::VMSEQ_VX: + case RISCV::VMSNE_VX: + case RISCV::VMSLTU_VX: + case RISCV::VMSLT_VX: + case RISCV::VMSLEU_VX: + case RISCV::VMSLE_VX: + case RISCV::VMSGTU_VX: + case RISCV::VMSGT_VX: + // 11.9. Vector Integer Min/Max Instructions + case RISCV::VMINU_VX: + case RISCV::VMIN_VX: + case RISCV::VMAXU_VX: + case RISCV::VMAX_VX: + // 11.10. Vector Single-Width Integer Multiply Instructions + case RISCV::VMUL_VX: + case RISCV::VMULH_VX: + case RISCV::VMULHU_VX: + case RISCV::VMULHSU_VX: + // 11.11. Vector Integer Divide Instructions + case RISCV::VDIVU_VX: + case RISCV::VDIV_VX: + case RISCV::VREMU_VX: + case RISCV::VREM_VX: + // 11.12. Vector Widening Integer Multiply Instructions + case RISCV::VWMUL_VX: + case RISCV::VWMULU_VX: + case RISCV::VWMULSU_VX: + // 11.13. Vector Single-Width Integer Multiply-Add Instructions + case RISCV::VMACC_VX: + case RISCV::VNMSAC_VX: + case RISCV::VMADD_VX: + case RISCV::VNMSUB_VX: + // 11.14. Vector Widening Integer Multiply-Add Instructions + case RISCV::VWMACCU_VX: + case RISCV::VWMACC_VX: + case RISCV::VWMACCSU_VX: + case RISCV::VWMACCUS_VX: + // 11.15. Vector Integer Merge Instructions + case RISCV::VMERGE_VXM: + // 11.16. Vector Integer Move Instructions + case RISCV::VMV_V_X: + // 12.1. Vector Single-Width Saturating Add and Subtract + case RISCV::VSADDU_VX: + case RISCV::VSADD_VX: + case RISCV::VSSUBU_VX: + case RISCV::VSSUB_VX: + // 12.2. Vector Single-Width Averaging Add and Subtract + case RISCV::VAADDU_VX: + case RISCV::VAADD_VX: + case RISCV::VASUBU_VX: + case RISCV::VASUB_VX: + // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation + case RISCV::VSMUL_VX: + // 16.1. Integer Scalar Move Instructions + case RISCV::VMV_S_X: + return (1 << Log2SEW) <= Bits; + } +} + // Encode VTYPE into the binary format used by the the VSETVLI instruction which // is used by our MC layer representation. // diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 20ff26a39dc3b..222d4e9eef674 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -241,6 +241,11 @@ static inline bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc) { Desc.getOperandConstraint(Desc.getNumDefs(), MCOI::TIED_TO) == 0; } +// Returns true if the .vx vector instruction \p Opcode only uses the lower \p +// Bits for a given SEW. +bool vectorInstUsesNBitsOfScalarOp(uint16_t Opcode, unsigned Bits, + unsigned Log2SEW); + // RISC-V Specific Machine Operand Flags enum { MO_None = 0, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 140473c595bbe..0b6de2b207ec3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2782,8 +2782,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, // TODO: Add more opcodes? switch (User->getMachineOpcode()) { - default: + default: { + if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo = + RISCVVPseudosTable::getPseudoInfo(User->getMachineOpcode())) { + + const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); + if (!RISCVII::hasSEWOp(MCID.TSFlags)) + return false; + assert(RISCVII::hasVLOp(MCID.TSFlags)); + + bool HasGlueOp = User->getGluedNode() != nullptr; + unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; + bool HasChainOp = + User->getOperand(ChainOpIdx).getValueType() == MVT::Other; + bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(MCID.TSFlags); + unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - + HasGlueOp - 2; + const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); + + if (UI.getOperandNo() == VLIdx) + return false; + if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr, Bits, + Log2SEW)) + break; + } return false; + } case RISCV::ADDW: case RISCV::ADDIW: case RISCV::SUBW: diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index bd294c669735f..1e4ce843bd913 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -106,8 +106,23 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, unsigned OpIdx = UserOp.getOperandNo(); switch (UserMI->getOpcode()) { - default: + default: { + if (const RISCVVPseudosTable::PseudoInfo *PseudoInfo = + RISCVVPseudosTable::getPseudoInfo(UserMI->getOpcode())) { + const MCInstrDesc &MCID = UserMI->getDesc(); + if (!RISCVII::hasSEWOp(MCID.TSFlags)) + return false; + assert(RISCVII::hasVLOp(MCID.TSFlags)); + const unsigned Log2SEW = + UserMI->getOperand(RISCVII::getSEWOpNum(MCID)).getImm(); + if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID)) + return false; + if (RISCVII::vectorInstUsesNBitsOfScalarOp(PseudoInfo->BaseInstr, + Bits, Log2SEW)) + break; + } return false; + } case RISCV::ADDIW: case RISCV::ADDW: diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll index e3a878052ee19..b3f561a52f411 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; These tests check that the scalable-vector version of this series of ; instructions does not get into an infinite DAGCombine loop. This was @@ -14,26 +12,15 @@ ; a constant SPLAT_VECTOR didn't follow suit. define <2 x i16> @fixedlen(<2 x i32> %x) { -; RV32-LABEL: fixedlen: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 16 -; RV32-NEXT: lui a0, 1048568 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV32-NEXT: vnsrl.wi v8, v8, 0 -; RV32-NEXT: ret -; -; RV64-LABEL: fixedlen: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vsrl.vi v8, v8, 16 -; RV64-NEXT: lui a0, 131071 -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: vand.vx v8, v8, a0 -; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; RV64-NEXT: vnsrl.wi v8, v8, 0 -; RV64-NEXT: ret +; CHECK-LABEL: fixedlen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v8, 16 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: ret %v41 = insertelement <2 x i32> poison, i32 16, i32 0 %v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer %v43 = lshr <2 x i32> %x, %v42 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 3167bcf26837b..25177734325ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -1,71 +1,39 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV32 -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV64 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s ; Integers define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { -; RV32-LABEL: vector_deinterleave_load_v16i1_v32i1: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vlm.v v0, (a0) -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v10, v8, 1, v0 -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v11, v9, v9 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vadd.vi v12, v11, -16 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t -; RV32-NEXT: vmsne.vi v9, v9, 0 -; RV32-NEXT: vadd.vi v12, v11, 1 -; RV32-NEXT: vrgather.vv v13, v10, v12 -; RV32-NEXT: vadd.vi v10, v11, -15 -; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t -; RV32-NEXT: vmsne.vi v8, v13, 0 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vector_deinterleave_load_v16i1_v32i1: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vlm.v v0, (a0) -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v10, v8, 1, v0 -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vadd.vv v11, v9, v9 -; RV64-NEXT: vrgather.vv v9, v10, v11 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vadd.vi v12, v11, -16 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t -; RV64-NEXT: vmsne.vi v9, v9, 0 -; RV64-NEXT: vadd.vi v12, v11, 1 -; RV64-NEXT: vrgather.vv v13, v10, v12 -; RV64-NEXT: vadd.vi v10, v11, -15 -; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t -; RV64-NEXT: vmsne.vi v8, v13, 0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vector_deinterleave_load_v16i1_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vrgather.vv v9, v10, v11 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vadd.vi v12, v11, -16 +; CHECK-NEXT: li a0, -256 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t +; CHECK-NEXT: vmsne.vi v9, v9, 0 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v13, v10, v12 +; CHECK-NEXT: vadd.vi v10, v11, -15 +; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t +; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p %retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index f0a359c13ce5d..742002bda8a9e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -585,8 +585,7 @@ define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> % ; ; RV64-LABEL: vpreduce_umax_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -626,8 +625,7 @@ define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> % ; ; RV64-LABEL: vpreduce_umin_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -727,8 +725,7 @@ define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> % ; ; RV64-LABEL: vpreduce_umax_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -768,8 +765,7 @@ define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> % ; ; RV64-LABEL: vpreduce_umin_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index be8281181cd2d..82c9f405c2392 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256 -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-UNKNOWN +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-256 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-UNKNOWN +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-256 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-512 ; ; VECTOR_REVERSE - masks @@ -65,7 +65,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 @@ -82,7 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -97,7 +97,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -163,7 +163,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0 @@ -180,7 +180,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -195,7 +195,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -257,7 +257,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v10 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 @@ -273,7 +273,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 @@ -287,7 +287,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) { ; RV64-BITS-512-NEXT: vmv.v.i v8, 0 ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 ; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 @@ -353,7 +353,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 @@ -370,7 +370,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10 @@ -385,7 +385,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0 ; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10 @@ -451,7 +451,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 @@ -468,7 +468,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12 @@ -483,7 +483,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) { ; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0 ; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12 @@ -552,7 +552,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 @@ -574,7 +574,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0 ; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16 @@ -586,7 +586,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 @@ -650,7 +650,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -663,7 +663,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 3 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -675,7 +675,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 3 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -728,7 +728,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -741,7 +741,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 2 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -753,7 +753,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -806,7 +806,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v9 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0 @@ -819,7 +819,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: srli a0, a0, 1 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -831,7 +831,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: srli a0, a0, 1 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -880,7 +880,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) { ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8: ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v10 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 @@ -892,7 +892,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) { ; RV64-BITS-256-LABEL: reverse_nxv8i8: ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-256-NEXT: vid.v v9 ; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0 @@ -903,7 +903,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) { ; RV64-BITS-512-LABEL: reverse_nxv8i8: ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-512-NEXT: vid.v v9 ; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0 @@ -956,7 +956,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v12 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0 @@ -969,7 +969,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 1 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-BITS-256-NEXT: vid.v v10 ; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0 @@ -981,7 +981,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 1 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-BITS-512-NEXT: vid.v v10 ; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0 @@ -1034,7 +1034,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 @@ -1047,7 +1047,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 2 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-256-NEXT: vid.v v12 ; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0 @@ -1059,7 +1059,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v12 ; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0 @@ -1114,7 +1114,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) { ; RV64-BITS-UNKNOWN: # %bb.0: ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 -; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 +; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0 @@ -1128,7 +1128,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) { ; RV64-BITS-256: # %bb.0: ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: addiw a0, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 @@ -1140,7 +1140,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) { ; RV64-BITS-512: # %bb.0: ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 -; RV64-BITS-512-NEXT: addi a0, a0, -1 +; RV64-BITS-512-NEXT: addiw a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0 @@ -1153,175 +1153,305 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) { } define <vscale x 1 x i16> @reverse_nxv1i16(<vscale x 1 x i16> %a) { -; CHECK-LABEL: reverse_nxv1i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv1i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv1i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 1 x i16> @llvm.experimental.vector.reverse.nxv1i16(<vscale x 1 x i16> %a) ret <vscale x 1 x i16> %res } define <vscale x 2 x i16> @reverse_nxv2i16(<vscale x 2 x i16> %a) { -; CHECK-LABEL: reverse_nxv2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> %a) ret <vscale x 2 x i16> %res } define <vscale x 4 x i16> @reverse_nxv4i16(<vscale x 4 x i16> %a) { -; CHECK-LABEL: reverse_nxv4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv4i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv4i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> %a) ret <vscale x 4 x i16> %res } define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) { -; CHECK-LABEL: reverse_nxv8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv8i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vx v12, v10, a0 +; RV32-NEXT: vrgather.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv8i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vx v12, v10, a0 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret %res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a) ret <vscale x 8 x i16> %res } define <vscale x 16 x i16> @reverse_nxv16i16(<vscale x 16 x i16> %a) { -; CHECK-LABEL: reverse_nxv16i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv16i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vrsub.vx v16, v12, a0 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv16i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vrsub.vx v16, v12, a0 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret %res = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> %a) ret <vscale x 16 x i16> %res } define <vscale x 32 x i16> @reverse_nxv32i16(<vscale x 32 x i16> %a) { -; CHECK-LABEL: reverse_nxv32i16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv32i16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vrsub.vx v24, v16, a0 +; RV32-NEXT: vrgather.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv32i16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-NEXT: vid.v v16 +; RV64-NEXT: vrsub.vx v24, v16, a0 +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret %res = call <vscale x 32 x i16> @llvm.experimental.vector.reverse.nxv32i16(<vscale x 32 x i16> %a) ret <vscale x 32 x i16> %res } define <vscale x 1 x i32> @reverse_nxv1i32(<vscale x 1 x i32> %a) { -; CHECK-LABEL: reverse_nxv1i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv1i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv1i32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 1 x i32> @llvm.experimental.vector.reverse.nxv1i32(<vscale x 1 x i32> %a) ret <vscale x 1 x i32> %res } define <vscale x 2 x i32> @reverse_nxv2i32(<vscale x 2 x i32> %a) { -; CHECK-LABEL: reverse_nxv2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv2i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv2i32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 2 x i32> @llvm.experimental.vector.reverse.nxv2i32(<vscale x 2 x i32> %a) ret <vscale x 2 x i32> %res } define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) { -; CHECK-LABEL: reverse_nxv4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv4i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vx v12, v10, a0 +; RV32-NEXT: vrgather.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv4i32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vx v12, v10, a0 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret %res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a) ret <vscale x 4 x i32> %res } define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) { -; CHECK-LABEL: reverse_nxv8i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv8i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vrsub.vx v16, v12, a0 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv8i32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vrsub.vx v16, v12, a0 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret %res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a) ret <vscale x 8 x i32> %res } define <vscale x 16 x i32> @reverse_nxv16i32(<vscale x 16 x i32> %a) { -; CHECK-LABEL: reverse_nxv16i32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv16i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vrsub.vx v24, v16, a0 +; RV32-NEXT: vrgather.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv16i32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64-NEXT: vid.v v16 +; RV64-NEXT: vrsub.vx v24, v16, a0 +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret %res = call <vscale x 16 x i32> @llvm.experimental.vector.reverse.nxv16i32(<vscale x 16 x i32> %a) ret <vscale x 16 x i32> %res } @@ -1394,175 +1524,305 @@ define <vscale x 8 x i64> @reverse_nxv8i64(<vscale x 8 x i64> %a) { ; define <vscale x 1 x half> @reverse_nxv1f16(<vscale x 1 x half> %a) { -; CHECK-LABEL: reverse_nxv1f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv1f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv1f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 1 x half> @llvm.experimental.vector.reverse.nxv1f16(<vscale x 1 x half> %a) ret <vscale x 1 x half> %res } define <vscale x 2 x half> @reverse_nxv2f16(<vscale x 2 x half> %a) { -; CHECK-LABEL: reverse_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv2f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv2f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> %a) ret <vscale x 2 x half> %res } define <vscale x 4 x half> @reverse_nxv4f16(<vscale x 4 x half> %a) { -; CHECK-LABEL: reverse_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv4f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv4f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> %a) ret <vscale x 4 x half> %res } define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) { -; CHECK-LABEL: reverse_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv8f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vx v12, v10, a0 +; RV32-NEXT: vrgather.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv8f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vx v12, v10, a0 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret %res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a) ret <vscale x 8 x half> %res } define <vscale x 16 x half> @reverse_nxv16f16(<vscale x 16 x half> %a) { -; CHECK-LABEL: reverse_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv16f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vrsub.vx v16, v12, a0 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv16f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vrsub.vx v16, v12, a0 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret %res = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> %a) ret <vscale x 16 x half> %res } define <vscale x 32 x half> @reverse_nxv32f16(<vscale x 32 x half> %a) { -; CHECK-LABEL: reverse_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv32f16: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vrsub.vx v24, v16, a0 +; RV32-NEXT: vrgather.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv32f16: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; RV64-NEXT: vid.v v16 +; RV64-NEXT: vrsub.vx v24, v16, a0 +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret %res = call <vscale x 32 x half> @llvm.experimental.vector.reverse.nxv32f16(<vscale x 32 x half> %a) ret <vscale x 32 x half> %res } define <vscale x 1 x float> @reverse_nxv1f32(<vscale x 1 x float> %a) { -; CHECK-LABEL: reverse_nxv1f32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv1f32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv1f32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 1 x float> @llvm.experimental.vector.reverse.nxv1f32(<vscale x 1 x float> %a) ret <vscale x 1 x float> %res } define <vscale x 2 x float> @reverse_nxv2f32(<vscale x 2 x float> %a) { -; CHECK-LABEL: reverse_nxv2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v10, v9, a0 -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv2f32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 2 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vx v10, v9, a0 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv2f32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 2 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vx v10, v9, a0 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret %res = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> %a) ret <vscale x 2 x float> %res } define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) { -; CHECK-LABEL: reverse_nxv4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vx v12, v10, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv4f32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV32-NEXT: vid.v v10 +; RV32-NEXT: vrsub.vx v12, v10, a0 +; RV32-NEXT: vrgather.vv v10, v8, v12 +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv4f32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vx v12, v10, a0 +; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret %res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a) ret <vscale x 4 x float> %res } define <vscale x 8 x float> @reverse_nxv8f32(<vscale x 8 x float> %a) { -; CHECK-LABEL: reverse_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vrsub.vx v16, v12, a0 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv8f32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vid.v v12 +; RV32-NEXT: vrsub.vx v16, v12, a0 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv8f32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vrsub.vx v16, v12, a0 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret %res = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> %a) ret <vscale x 8 x float> %res } define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) { -; CHECK-LABEL: reverse_nxv16f32: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a0 -; CHECK-NEXT: vrgather.vv v16, v8, v24 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret +; RV32-LABEL: reverse_nxv16f32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV32-NEXT: vid.v v16 +; RV32-NEXT: vrsub.vx v24, v16, a0 +; RV32-NEXT: vrgather.vv v16, v8, v24 +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv16f32: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64-NEXT: vid.v v16 +; RV64-NEXT: vrsub.vx v24, v16, a0 +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret %res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a) ret <vscale x 16 x float> %res } @@ -1668,221 +1928,77 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) { } define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) { -; RV32-BITS-UNKNOWN-LABEL: reverse_nxv12i64: -; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -80 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80 -; RV32-BITS-UNKNOWN-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4 -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8 -; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 80 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 4 -; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0 -; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64 -; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV32-BITS-UNKNOWN-NEXT: addi a1, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v24 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1 -; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24 -; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4 -; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24 -; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0 -; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 -; RV32-BITS-UNKNOWN-NEXT: addi a1, sp, 64 -; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 -; RV32-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0) -; RV32-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1) -; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0) -; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1) -; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -80 -; RV32-BITS-UNKNOWN-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 80 -; RV32-BITS-UNKNOWN-NEXT: ret -; -; RV32-BITS-256-LABEL: reverse_nxv12i64: -; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: addi sp, sp, -80 -; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 80 -; RV32-BITS-256-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: .cfi_offset ra, -4 -; RV32-BITS-256-NEXT: .cfi_offset s0, -8 -; RV32-BITS-256-NEXT: addi s0, sp, 80 -; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-256-NEXT: csrr a0, vlenb -; RV32-BITS-256-NEXT: slli a0, a0, 4 -; RV32-BITS-256-NEXT: sub sp, sp, a0 -; RV32-BITS-256-NEXT: andi sp, sp, -64 -; RV32-BITS-256-NEXT: csrr a0, vlenb -; RV32-BITS-256-NEXT: addi a1, a0, -1 -; RV32-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-BITS-256-NEXT: vid.v v24 -; RV32-BITS-256-NEXT: vrsub.vx v24, v24, a1 -; RV32-BITS-256-NEXT: vrgather.vv v0, v16, v24 -; RV32-BITS-256-NEXT: vmv4r.v v16, v4 -; RV32-BITS-256-NEXT: vrgather.vv v0, v8, v24 -; RV32-BITS-256-NEXT: vmv4r.v v20, v0 -; RV32-BITS-256-NEXT: slli a0, a0, 3 -; RV32-BITS-256-NEXT: addi a1, sp, 64 -; RV32-BITS-256-NEXT: add a0, a1, a0 -; RV32-BITS-256-NEXT: vs4r.v v4, (a0) -; RV32-BITS-256-NEXT: vs8r.v v16, (a1) -; RV32-BITS-256-NEXT: vl8re64.v v16, (a0) -; RV32-BITS-256-NEXT: vl8re64.v v8, (a1) -; RV32-BITS-256-NEXT: addi sp, s0, -80 -; RV32-BITS-256-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: addi sp, sp, 80 -; RV32-BITS-256-NEXT: ret -; -; RV32-BITS-512-LABEL: reverse_nxv12i64: -; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: addi sp, sp, -80 -; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 80 -; RV32-BITS-512-NEXT: sw ra, 76(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: sw s0, 72(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: .cfi_offset ra, -4 -; RV32-BITS-512-NEXT: .cfi_offset s0, -8 -; RV32-BITS-512-NEXT: addi s0, sp, 80 -; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-512-NEXT: csrr a0, vlenb -; RV32-BITS-512-NEXT: slli a0, a0, 4 -; RV32-BITS-512-NEXT: sub sp, sp, a0 -; RV32-BITS-512-NEXT: andi sp, sp, -64 -; RV32-BITS-512-NEXT: csrr a0, vlenb -; RV32-BITS-512-NEXT: addi a1, a0, -1 -; RV32-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV32-BITS-512-NEXT: vid.v v24 -; RV32-BITS-512-NEXT: vrsub.vx v24, v24, a1 -; RV32-BITS-512-NEXT: vrgather.vv v0, v16, v24 -; RV32-BITS-512-NEXT: vmv4r.v v16, v4 -; RV32-BITS-512-NEXT: vrgather.vv v0, v8, v24 -; RV32-BITS-512-NEXT: vmv4r.v v20, v0 -; RV32-BITS-512-NEXT: slli a0, a0, 3 -; RV32-BITS-512-NEXT: addi a1, sp, 64 -; RV32-BITS-512-NEXT: add a0, a1, a0 -; RV32-BITS-512-NEXT: vs4r.v v4, (a0) -; RV32-BITS-512-NEXT: vs8r.v v16, (a1) -; RV32-BITS-512-NEXT: vl8re64.v v16, (a0) -; RV32-BITS-512-NEXT: vl8re64.v v8, (a1) -; RV32-BITS-512-NEXT: addi sp, s0, -80 -; RV32-BITS-512-NEXT: lw ra, 76(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: lw s0, 72(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: addi sp, sp, 80 -; RV32-BITS-512-NEXT: ret -; -; RV64-BITS-UNKNOWN-LABEL: reverse_nxv12i64: -; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -80 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80 -; RV64-BITS-UNKNOWN-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8 -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16 -; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 80 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 4 -; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0 -; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64 -; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: addi a1, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v24 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1 -; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24 -; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4 -; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24 -; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0 -; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 -; RV64-BITS-UNKNOWN-NEXT: addi a1, sp, 64 -; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 -; RV64-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0) -; RV64-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1) -; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0) -; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1) -; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -80 -; RV64-BITS-UNKNOWN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 80 -; RV64-BITS-UNKNOWN-NEXT: ret -; -; RV64-BITS-256-LABEL: reverse_nxv12i64: -; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: addi sp, sp, -80 -; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 80 -; RV64-BITS-256-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: .cfi_offset ra, -8 -; RV64-BITS-256-NEXT: .cfi_offset s0, -16 -; RV64-BITS-256-NEXT: addi s0, sp, 80 -; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: slli a0, a0, 4 -; RV64-BITS-256-NEXT: sub sp, sp, a0 -; RV64-BITS-256-NEXT: andi sp, sp, -64 -; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: addi a1, a0, -1 -; RV64-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-BITS-256-NEXT: vid.v v24 -; RV64-BITS-256-NEXT: vrsub.vx v24, v24, a1 -; RV64-BITS-256-NEXT: vrgather.vv v0, v16, v24 -; RV64-BITS-256-NEXT: vmv4r.v v16, v4 -; RV64-BITS-256-NEXT: vrgather.vv v0, v8, v24 -; RV64-BITS-256-NEXT: vmv4r.v v20, v0 -; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addi a1, sp, 64 -; RV64-BITS-256-NEXT: add a0, a1, a0 -; RV64-BITS-256-NEXT: vs4r.v v4, (a0) -; RV64-BITS-256-NEXT: vs8r.v v16, (a1) -; RV64-BITS-256-NEXT: vl8re64.v v16, (a0) -; RV64-BITS-256-NEXT: vl8re64.v v8, (a1) -; RV64-BITS-256-NEXT: addi sp, s0, -80 -; RV64-BITS-256-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: addi sp, sp, 80 -; RV64-BITS-256-NEXT: ret -; -; RV64-BITS-512-LABEL: reverse_nxv12i64: -; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: addi sp, sp, -80 -; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 80 -; RV64-BITS-512-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: sd s0, 64(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: .cfi_offset ra, -8 -; RV64-BITS-512-NEXT: .cfi_offset s0, -16 -; RV64-BITS-512-NEXT: addi s0, sp, 80 -; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: slli a0, a0, 4 -; RV64-BITS-512-NEXT: sub sp, sp, a0 -; RV64-BITS-512-NEXT: andi sp, sp, -64 -; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: addi a1, a0, -1 -; RV64-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-BITS-512-NEXT: vid.v v24 -; RV64-BITS-512-NEXT: vrsub.vx v24, v24, a1 -; RV64-BITS-512-NEXT: vrgather.vv v0, v16, v24 -; RV64-BITS-512-NEXT: vmv4r.v v16, v4 -; RV64-BITS-512-NEXT: vrgather.vv v0, v8, v24 -; RV64-BITS-512-NEXT: vmv4r.v v20, v0 -; RV64-BITS-512-NEXT: slli a0, a0, 3 -; RV64-BITS-512-NEXT: addi a1, sp, 64 -; RV64-BITS-512-NEXT: add a0, a1, a0 -; RV64-BITS-512-NEXT: vs4r.v v4, (a0) -; RV64-BITS-512-NEXT: vs8r.v v16, (a1) -; RV64-BITS-512-NEXT: vl8re64.v v16, (a0) -; RV64-BITS-512-NEXT: vl8re64.v v8, (a1) -; RV64-BITS-512-NEXT: addi sp, s0, -80 -; RV64-BITS-512-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: ld s0, 64(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: addi sp, sp, 80 -; RV64-BITS-512-NEXT: ret +; RV32-LABEL: reverse_nxv12i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -80 +; RV32-NEXT: .cfi_def_cfa_offset 80 +; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 80 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: addi a1, a0, -1 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vid.v v24 +; RV32-NEXT: vrsub.vx v24, v24, a1 +; RV32-NEXT: vrgather.vv v0, v16, v24 +; RV32-NEXT: vmv4r.v v16, v4 +; RV32-NEXT: vrgather.vv v0, v8, v24 +; RV32-NEXT: vmv4r.v v20, v0 +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: addi a1, sp, 64 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: vs4r.v v4, (a0) +; RV32-NEXT: vs8r.v v16, (a1) +; RV32-NEXT: vl8re64.v v16, (a0) +; RV32-NEXT: vl8re64.v v8, (a1) +; RV32-NEXT: addi sp, s0, -80 +; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 80 +; RV32-NEXT: ret +; +; RV64-LABEL: reverse_nxv12i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: .cfi_def_cfa_offset 80 +; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: .cfi_offset s0, -16 +; RV64-NEXT: addi s0, sp, 80 +; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: addi a1, a0, -1 +; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vid.v v24 +; RV64-NEXT: vrsub.vx v24, v24, a1 +; RV64-NEXT: vrgather.vv v0, v16, v24 +; RV64-NEXT: vmv4r.v v16, v4 +; RV64-NEXT: vrgather.vv v0, v8, v24 +; RV64-NEXT: vmv4r.v v20, v0 +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: addi a1, sp, 64 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: vs4r.v v4, (a0) +; RV64-NEXT: vs8r.v v16, (a1) +; RV64-NEXT: vl8re64.v v16, (a0) +; RV64-NEXT: vl8re64.v v8, (a1) +; RV64-NEXT: addi sp, s0, -80 +; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret %res = call <vscale x 12 x i64> @llvm.experimental.vector.reverse.nxv12i64(<vscale x 12 x i64> %a) ret <vscale x 12 x i64> %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index 443fe93a618c5..56d98981947c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -32,13 +32,11 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: slli a0, a0, 31 +; CHECK-NEXT: vmv.v.x v9, a1 ; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 @@ -116,13 +114,11 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsll.vv v12, v8, v10 ; CHECK-NEXT: vsra.vv v14, v12, v10 ; CHECK-NEXT: vmsne.vv v10, v8, v14 -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: slli a0, a0, 31 +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index c674670d8d030..f7ccf2c32cde0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -1,65 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck --check-prefixes=CHECK,RV32 %s -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck --check-prefixes=CHECK,RV64 %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s ; Integers define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { -; RV32-LABEL: vector_deinterleave_v16i1_v32i1: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v10, v8, 1, v0 -; RV32-NEXT: vid.v v9 -; RV32-NEXT: vadd.vv v11, v9, v9 -; RV32-NEXT: vrgather.vv v9, v10, v11 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vadd.vi v12, v11, -16 -; RV32-NEXT: lui a0, 16 -; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t -; RV32-NEXT: vmsne.vi v9, v9, 0 -; RV32-NEXT: vadd.vi v12, v11, 1 -; RV32-NEXT: vrgather.vv v13, v10, v12 -; RV32-NEXT: vadd.vi v10, v11, -15 -; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t -; RV32-NEXT: vmsne.vi v8, v13, 0 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: ret -; -; RV64-LABEL: vector_deinterleave_v16i1_v32i1: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v10, v8, 1, v0 -; RV64-NEXT: vid.v v9 -; RV64-NEXT: vadd.vv v11, v9, v9 -; RV64-NEXT: vrgather.vv v9, v10, v11 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vadd.vi v12, v11, -16 -; RV64-NEXT: lui a0, 16 -; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t -; RV64-NEXT: vmsne.vi v9, v9, 0 -; RV64-NEXT: vadd.vi v12, v11, 1 -; RV64-NEXT: vrgather.vv v13, v10, v12 -; RV64-NEXT: vadd.vi v10, v11, -15 -; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t -; RV64-NEXT: vmsne.vi v8, v13, 0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: ret +; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v11, v9, v9 +; CHECK-NEXT: vrgather.vv v9, v10, v11 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vadd.vi v12, v11, -16 +; CHECK-NEXT: li a0, -256 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t +; CHECK-NEXT: vmsne.vi v9, v9, 0 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v13, v10, v12 +; CHECK-NEXT: vadd.vi v10, v11, -15 +; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t +; CHECK-NEXT: vmsne.vi v8, v13, 0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll index 06f5d39622da8..0fda7909df313 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; Test that the prepareSREMEqFold optimization doesn't crash on scalable ; vector types. @@ -60,21 +60,12 @@ define <vscale x 1 x i32> @vmulh_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) { } define <vscale x 1 x i32> @vmulh_vi_nxv1i32_0(<vscale x 1 x i32> %va) { -; RV32-LABEL: vmulh_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulh_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulh_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer %vb = sext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64> @@ -141,21 +132,12 @@ define <vscale x 2 x i32> @vmulh_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) { } define <vscale x 2 x i32> @vmulh_vi_nxv2i32_0(<vscale x 2 x i32> %va) { -; RV32-LABEL: vmulh_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulh_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulh_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer %vb = sext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64> @@ -222,21 +204,12 @@ define <vscale x 4 x i32> @vmulh_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) { } define <vscale x 4 x i32> @vmulh_vi_nxv4i32_0(<vscale x 4 x i32> %va) { -; RV32-LABEL: vmulh_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulh_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulh_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer %vb = sext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64> @@ -303,21 +276,12 @@ define <vscale x 8 x i32> @vmulh_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) { } define <vscale x 8 x i32> @vmulh_vi_nxv8i32_0(<vscale x 8 x i32> %va) { -; RV32-LABEL: vmulh_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulh.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulh_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulh.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulh_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer %vb = sext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll index 186d56b1293db..5354c17fd2a7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll @@ -37,21 +37,12 @@ define <vscale x 1 x i32> @vmulhu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) { } define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) { -; RV32-LABEL: vmulhu_vi_nxv1i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv1i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv1i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer %vb = zext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64> @@ -124,21 +115,12 @@ define <vscale x 2 x i32> @vmulhu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) { } define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) { -; RV32-LABEL: vmulhu_vi_nxv2i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv2i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv2i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer %vb = zext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64> @@ -211,21 +193,12 @@ define <vscale x 4 x i32> @vmulhu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) { } define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) { -; RV32-LABEL: vmulhu_vi_nxv4i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv4i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv4i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer %vb = zext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64> @@ -298,21 +271,12 @@ define <vscale x 8 x i32> @vmulhu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) { } define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) { -; RV32-LABEL: vmulhu_vi_nxv8i32_0: -; RV32: # %bb.0: -; RV32-NEXT: li a0, -7 -; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32-NEXT: vmulhu.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vmulhu_vi_nxv8i32_0: -; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: addi a0, a0, -7 -; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64-NEXT: vmulhu.vx v8, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: vmulhu_vi_nxv8i32_0: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, -7 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret %head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0 %splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer %vb = zext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll index ba7f3e801aa07..6a8fe57f23f66 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -852,8 +852,7 @@ define signext i32 @vpreduce_umax_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, ; ; RV64-LABEL: vpreduce_umax_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -893,8 +892,7 @@ define signext i32 @vpreduce_umin_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, ; ; RV64-LABEL: vpreduce_umin_nxv1i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma @@ -994,8 +992,7 @@ define signext i32 @vpreduce_umax_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, ; ; RV64-LABEL: vpreduce_umax_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1035,8 +1032,7 @@ define signext i32 @vpreduce_umin_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, ; ; RV64-LABEL: vpreduce_umin_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -1136,8 +1132,7 @@ define signext i32 @vpreduce_umax_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, ; ; RV64-LABEL: vpreduce_umax_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma @@ -1182,8 +1177,7 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> % ; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v24, v0, a2 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a2, a0, 32 +; RV64-NEXT: andi a2, a0, -1 ; RV64-NEXT: slli a3, a3, 1 ; RV64-NEXT: sub a0, a1, a3 ; RV64-NEXT: sltu a4, a1, a0 @@ -1235,8 +1229,7 @@ define signext i32 @vpreduce_umin_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, ; ; RV64-LABEL: vpreduce_umin_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma