diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f89f300a4e9e5..c5c75ae19daa9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7895,6 +7895,30 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + // If we're compiling for an exact VLEN value and we have a known + // constant index, we can always perform the extract in m1 (or + // smaller) as we can determine the register corresponding to + // the index in the register group. + const unsigned MinVLen = Subtarget.getRealMinVLen(); + const unsigned MaxVLen = Subtarget.getRealMaxVLen(); + if (auto *IdxC = dyn_cast(Idx); + IdxC && MinVLen == MaxVLen && + VecVT.getSizeInBits().getKnownMinValue() > MinVLen) { + MVT M1VT = getLMUL1VT(ContainerVT); + unsigned OrigIdx = IdxC->getZExtValue(); + EVT ElemVT = VecVT.getVectorElementType(); + unsigned ElemSize = ElemVT.getSizeInBits().getKnownMinValue(); + unsigned ElemsPerVReg = MinVLen / ElemSize; + unsigned RemIdx = OrigIdx % ElemsPerVReg; + unsigned SubRegIdx = OrigIdx / ElemsPerVReg; + unsigned ExtractIdx = + SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue(); + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, + DAG.getVectorIdxConstant(ExtractIdx, DL)); + Idx = DAG.getVectorIdxConstant(RemIdx, DL); + ContainerVT = M1VT; + } + // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which // contains our index. std::optional MaxIdx; diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index 34dcce3fe058b..9df0871046959 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -697,6 +697,27 @@ define i64 @extractelt_nxv8i64_imm( %v) { ret i64 %r } +define i64 @extractelt_nxv8i64_2_exact_vlen( %v) vscale_range(2,2) { +; CHECK-LABEL: extractelt_nxv8i64_2_exact_vlen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: ret + %r = extractelement %v, i32 2 + ret i64 %r +} + +define i64 @extractelt_nxv8i64_15_exact_vlen( %v) vscale_range(2,2) { +; CHECK-LABEL: extractelt_nxv8i64_15_exact_vlen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v15, 1 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %r = extractelement %v, i32 15 + ret i64 %r +} + define i64 @extractelt_nxv8i64_idx( %v, i32 zeroext %idx) { ; CHECK-LABEL: extractelt_nxv8i64_idx: ; CHECK: # %bb.0: @@ -860,10 +881,10 @@ define i64 @extractelt_nxv16i64_neg1( %v) { ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vs8r.v v16, (a3) -; CHECK-NEXT: bltu a2, a1, .LBB72_2 +; CHECK-NEXT: bltu a2, a1, .LBB74_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB72_2: +; CHECK-NEXT: .LBB74_2: ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: ld a0, 0(a0) @@ -893,10 +914,10 @@ define i64 @extractelt_nxv16i64_idx( %v, i32 zeroext %idx) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: bltu a0, a2, .LBB74_2 +; CHECK-NEXT: bltu a0, a2, .LBB76_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: .LBB74_2: +; CHECK-NEXT: .LBB76_2: ; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 95c1beb284c40..d3c4b0f5cddd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -1137,3 +1137,31 @@ define float @extractelt_fdiv_v4f32(<4 x float> %x) { %ext = extractelement <4 x float> %bo, i32 2 ret float %ext } + +define i32 @extractelt_v16i32_idx7_exact_vlen(ptr %x) nounwind vscale_range(2,2) { +; CHECK-LABEL: extractelt_v16i32_idx7_exact_vlen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v9, 3 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %a = load <16 x i32>, ptr %x + %b = extractelement <16 x i32> %a, i32 7 + ret i32 %b +} + +define i32 @extractelt_v16i32_idx15_exact_vlen(ptr %x) nounwind vscale_range(2,2) { +; CHECK-LABEL: extractelt_v16i32_idx15_exact_vlen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v11, 3 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret + %a = load <16 x i32>, ptr %x + %b = extractelement <16 x i32> %a, i32 15 + ret i32 %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll index f3570495600f3..e5bbbd661e6a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll @@ -1084,3 +1084,133 @@ define i64 @explode_16xi64(<16 x i64> %v) { %add14 = add i64 %add13, %e15 ret i64 %add14 } + +define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) { +; RV32-LABEL: explode_16xi32_exact_vlen: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 2 +; RV32-NEXT: vmv.x.s a0, v12 +; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: vmv.x.s a1, v12 +; RV32-NEXT: vmv.x.s a2, v9 +; RV32-NEXT: vslidedown.vi v12, v9, 1 +; RV32-NEXT: vmv.x.s a3, v12 +; RV32-NEXT: vslidedown.vi v12, v9, 2 +; RV32-NEXT: vmv.x.s a4, v12 +; RV32-NEXT: vslidedown.vi v9, v9, 3 +; RV32-NEXT: vmv.x.s a5, v9 +; RV32-NEXT: vmv.x.s a6, v10 +; RV32-NEXT: vslidedown.vi v9, v10, 1 +; RV32-NEXT: vmv.x.s a7, v9 +; RV32-NEXT: vslidedown.vi v9, v10, 2 +; RV32-NEXT: vmv.x.s t0, v9 +; RV32-NEXT: vslidedown.vi v9, v10, 3 +; RV32-NEXT: vmv.x.s t1, v9 +; RV32-NEXT: vmv.x.s t2, v11 +; RV32-NEXT: vslidedown.vi v9, v11, 1 +; RV32-NEXT: vmv.x.s t3, v9 +; RV32-NEXT: vslidedown.vi v9, v11, 2 +; RV32-NEXT: vmv.x.s t4, v9 +; RV32-NEXT: vslidedown.vi v9, v11, 3 +; RV32-NEXT: vmv.x.s t5, v9 +; RV32-NEXT: vmv.s.x v9, zero +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vredxor.vs v8, v8, v9 +; RV32-NEXT: vmv.x.s t6, v8 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, t6, a0 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: add a5, a5, t0 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add t1, t1, t2 +; RV32-NEXT: add t1, t1, t3 +; RV32-NEXT: add t1, t1, t4 +; RV32-NEXT: add t1, t1, t5 +; RV32-NEXT: add a0, a0, t1 +; RV32-NEXT: ret +; +; RV64-LABEL: explode_16xi32_exact_vlen: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 2 +; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-NEXT: vmv.x.s a1, v12 +; RV64-NEXT: vmv.x.s a2, v9 +; RV64-NEXT: vslidedown.vi v12, v9, 1 +; RV64-NEXT: vmv.x.s a3, v12 +; RV64-NEXT: vslidedown.vi v12, v9, 2 +; RV64-NEXT: vmv.x.s a4, v12 +; RV64-NEXT: vslidedown.vi v9, v9, 3 +; RV64-NEXT: vmv.x.s a5, v9 +; RV64-NEXT: vmv.x.s a6, v10 +; RV64-NEXT: vslidedown.vi v9, v10, 1 +; RV64-NEXT: vmv.x.s a7, v9 +; RV64-NEXT: vslidedown.vi v9, v10, 2 +; RV64-NEXT: vmv.x.s t0, v9 +; RV64-NEXT: vslidedown.vi v9, v10, 3 +; RV64-NEXT: vmv.x.s t1, v9 +; RV64-NEXT: vmv.x.s t2, v11 +; RV64-NEXT: vslidedown.vi v9, v11, 1 +; RV64-NEXT: vmv.x.s t3, v9 +; RV64-NEXT: vslidedown.vi v9, v11, 2 +; RV64-NEXT: vmv.x.s t4, v9 +; RV64-NEXT: vslidedown.vi v9, v11, 3 +; RV64-NEXT: vmv.x.s t5, v9 +; RV64-NEXT: vmv.s.x v9, zero +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vredxor.vs v8, v8, v9 +; RV64-NEXT: vmv.x.s t6, v8 +; RV64-NEXT: add a0, a0, a1 +; RV64-NEXT: add a0, t6, a0 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: add a5, a5, a7 +; RV64-NEXT: add a5, a5, t0 +; RV64-NEXT: add a0, a0, a5 +; RV64-NEXT: add t1, t1, t2 +; RV64-NEXT: add t1, t1, t3 +; RV64-NEXT: add t1, t1, t4 +; RV64-NEXT: add t1, t1, t5 +; RV64-NEXT: addw a0, a0, t1 +; RV64-NEXT: ret + %e0 = extractelement <16 x i32> %v, i32 0 + %e1 = extractelement <16 x i32> %v, i32 1 + %e2 = extractelement <16 x i32> %v, i32 2 + %e3 = extractelement <16 x i32> %v, i32 3 + %e4 = extractelement <16 x i32> %v, i32 4 + %e5 = extractelement <16 x i32> %v, i32 5 + %e6 = extractelement <16 x i32> %v, i32 6 + %e7 = extractelement <16 x i32> %v, i32 7 + %e8 = extractelement <16 x i32> %v, i32 8 + %e9 = extractelement <16 x i32> %v, i32 9 + %e10 = extractelement <16 x i32> %v, i32 10 + %e11 = extractelement <16 x i32> %v, i32 11 + %e12 = extractelement <16 x i32> %v, i32 12 + %e13 = extractelement <16 x i32> %v, i32 13 + %e14 = extractelement <16 x i32> %v, i32 14 + %e15 = extractelement <16 x i32> %v, i32 15 + %add0 = xor i32 %e0, %e1 + %add1 = add i32 %add0, %e2 + %add2 = add i32 %add1, %e3 + %add3 = add i32 %add2, %e4 + %add4 = add i32 %add3, %e5 + %add5 = add i32 %add4, %e6 + %add6 = add i32 %add5, %e7 + %add7 = add i32 %add6, %e8 + %add8 = add i32 %add7, %e9 + %add9 = add i32 %add8, %e10 + %add10 = add i32 %add9, %e11 + %add11 = add i32 %add10, %e12 + %add12 = add i32 %add11, %e13 + %add13 = add i32 %add12, %e14 + %add14 = add i32 %add13, %e15 + ret i32 %add14 +}