diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6eb253cc51466..4dc3f6137e306 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -13803,9 +13804,17 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, Align = std::min(Align, Ld->getAlign()); } - using PtrDiff = std::pair; - auto GetPtrDiff = [](LoadSDNode *Ld1, - LoadSDNode *Ld2) -> std::optional { + using PtrDiff = std::pair, bool>; + auto GetPtrDiff = [&DAG](LoadSDNode *Ld1, + LoadSDNode *Ld2) -> std::optional { + // If the load ptrs can be decomposed into a common (Base + Index) with a + // common constant stride, then return the constant stride. + BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG); + BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG); + if (BIO1.equalBaseIndex(BIO2, DAG)) + return {{BIO2.getOffset() - BIO1.getOffset(), false}}; + + // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride) SDValue P1 = Ld1->getBasePtr(); SDValue P2 = Ld2->getBasePtr(); if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1) @@ -13844,7 +13853,11 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, if (!TLI.isLegalStridedLoadStore(WideVecVT, Align)) return SDValue(); - auto [Stride, MustNegateStride] = *BaseDiff; + auto [StrideVariant, MustNegateStride] = *BaseDiff; + SDValue Stride = std::holds_alternative(StrideVariant) + ? std::get(StrideVariant) + : DAG.getConstant(std::get(StrideVariant), DL, + Lds[0]->getOffset().getValueType()); if (MustNegateStride) Stride = DAG.getNegative(Stride, DL, Stride.getValueType()); diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll new file mode 100644 index 0000000000000..ff35043dbd7e7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define void @constant_forward_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = load <2 x i8>, ptr %s, align 1 + %5 = load <2 x i8>, ptr %1, align 1 + %6 = load <2 x i8>, ptr %2, align 1 + %7 = load <2 x i8>, ptr %3, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_forward_stride2(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, -48 +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = load <2 x i8>, ptr %3, align 1 + %5 = load <2 x i8>, ptr %2, align 1 + %6 = load <2 x i8>, ptr %1, align 1 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_forward_stride3(ptr %s, ptr %d) { +; CHECK-LABEL: constant_forward_stride3: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = getelementptr inbounds i8, ptr %1, i64 0 + %5 = getelementptr inbounds i8, ptr %2, i64 0 + %6 = getelementptr inbounds i8, ptr %3, i64 0 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = load <2 x i8>, ptr %4, align 1 + %9 = load <2 x i8>, ptr %5, align 1 + %10 = load <2 x i8>, ptr %6, align 1 + %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> + %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> + %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> + store <8 x i8> %13, ptr %d, align 1 + ret void +} + +define void @constant_back_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = load <2 x i8>, ptr %s, align 1 + %5 = load <2 x i8>, ptr %1, align 1 + %6 = load <2 x i8>, ptr %2, align 1 + %7 = load <2 x i8>, ptr %3, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_back_stride2(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, a0, 48 +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 16 + %2 = getelementptr inbounds i8, ptr %s, i64 32 + %3 = getelementptr inbounds i8, ptr %s, i64 48 + %4 = load <2 x i8>, ptr %3, align 1 + %5 = load <2 x i8>, ptr %2, align 1 + %6 = load <2 x i8>, ptr %1, align 1 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = shufflevector <2 x i8> %4, <2 x i8> %5, <4 x i32> + %9 = shufflevector <2 x i8> %6, <2 x i8> %7, <4 x i32> + %10 = shufflevector <4 x i8> %8, <4 x i8> %9, <8 x i32> + store <8 x i8> %10, ptr %d, align 1 + ret void +} + +define void @constant_back_stride3(ptr %s, ptr %d) { +; CHECK-LABEL: constant_back_stride3: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, -16 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vlse16.v v8, (a0), a2 +; CHECK-NEXT: vse16.v v8, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 -16 + %2 = getelementptr inbounds i8, ptr %s, i64 -32 + %3 = getelementptr inbounds i8, ptr %s, i64 -48 + %4 = getelementptr inbounds i8, ptr %1, i64 0 + %5 = getelementptr inbounds i8, ptr %2, i64 0 + %6 = getelementptr inbounds i8, ptr %3, i64 0 + %7 = load <2 x i8>, ptr %s, align 1 + %8 = load <2 x i8>, ptr %4, align 1 + %9 = load <2 x i8>, ptr %5, align 1 + %10 = load <2 x i8>, ptr %6, align 1 + %11 = shufflevector <2 x i8> %7, <2 x i8> %8, <4 x i32> + %12 = shufflevector <2 x i8> %9, <2 x i8> %10, <4 x i32> + %13 = shufflevector <4 x i8> %11, <4 x i8> %12, <8 x i32> + store <8 x i8> %13, ptr %d, align 1 + ret void +} + +define void @constant_zero_stride(ptr %s, ptr %d) { +; CHECK-LABEL: constant_zero_stride: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: ret + %1 = getelementptr inbounds i8, ptr %s, i64 0 + %2 = load <2 x i8>, ptr %s, align 1 + %3 = load <2 x i8>, ptr %1, align 1 + %4 = shufflevector <2 x i8> %2, <2 x i8> %3, <4 x i32> + store <4 x i8> %4, ptr %d, align 1 + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}}