diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 9b9e870fb61d9..72f96965ae985 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -291,9 +291,6 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, .setMemRefs(MMOLo); if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { - // FIXME: Zdinx RV32 can not work on unaligned scalar memory. - assert(!STI->enableUnalignedScalarMem()); - assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) @@ -344,7 +341,7 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { auto Offset = MBBI->getOperand(2).getOffset(); - assert(MBBI->getOperand(2).getOffset() % 8 == 0); + assert(Offset % 8 == 0); MBBI->getOperand(2).setOffset(Offset + 4); BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi) .addReg(MBBI->getOperand(1).getReg()) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 58f8dc4970282..48fe788a81dff 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2522,7 +2522,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, - bool IsPrefetch = false) { + bool IsPrefetch = false, + bool IsRV32Zdinx = false) { if (!isa(Addr)) return false; @@ -2536,6 +2537,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, if (!Subtarget->is64Bit() || isInt<32>(Hi)) { if (IsPrefetch && (Lo12 & 0b11111) != 0) return false; + if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) + return false; if (Hi) { int64_t Hi20 = (Hi >> 12) & 0xfffff; @@ -2560,6 +2563,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, Lo12 = Seq.back().getImm(); if (IsPrefetch && (Lo12 & 0b11111) != 0) return false; + if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) + return false; // Drop the last instruction. Seq.pop_back(); @@ -2649,7 +2654,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, } bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, - SDValue &Offset, bool IsINX) { + SDValue &Offset, bool IsRV32Zdinx) { if (SelectAddrFrameIndex(Addr, Base, Offset)) return true; @@ -2657,12 +2662,36 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, MVT VT = Addr.getSimpleValueType(); if (Addr.getOpcode() == RISCVISD::ADD_LO) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + // If this is non RV32Zdinx we can always fold. + if (!IsRV32Zdinx) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4 + // to the offset when we expand in RISCVExpandPseudoInsts. + if (auto *GA = dyn_cast(Addr.getOperand(1))) { + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = commonAlignment( + GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); + if (Alignment > 4) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + } + if (auto *CP = dyn_cast(Addr.getOperand(1))) { + Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); + if (Alignment > 4) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + } } - int64_t RV32ZdinxRange = IsINX ? 4 : 0; + int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0; if (CurDAG->isBaseWithConstantOffset(Addr)) { int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { @@ -2678,7 +2707,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, const DataLayout &DL = CurDAG->getDataLayout(); Align Alignment = commonAlignment( GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); - if (CVal == 0 || Alignment > CVal) { + if ((CVal == 0 || Alignment > CVal) && + (!IsRV32Zdinx || Alignment > (CVal + 4))) { int64_t CombinedOffset = CVal + GA->getOffset(); Base = Base.getOperand(0); Offset = CurDAG->getTargetGlobalAddress( @@ -2705,7 +2735,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use // an ADDI for part of the offset and fold the rest into the load/store. // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. - if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { + if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) { int64_t Adj = CVal < 0 ? -2048 : 2047; Base = SDValue( CurDAG->getMachineNode( @@ -2724,7 +2754,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, // instructions. if (isWorthFoldingAdd(Addr) && selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, - Offset)) { + Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) { // Insert an ADD instruction with the materialized Hi52 bits. Base = SDValue( CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), @@ -2733,7 +2763,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, } } - if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, + /*IsPrefetch=*/false, RV32ZdinxRange)) return true; Base = Addr; @@ -2791,7 +2822,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, } if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, - Offset, true)) { + Offset, /*IsPrefetch=*/true)) { // Insert an ADD instruction with the materialized Hi52 bits. Base = SDValue( CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), @@ -2800,7 +2831,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, } } - if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true)) + if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, + /*IsPrefetch=*/true)) return true; Base = Addr; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 1b3b00eeccce8..6dfaee0bcf8d4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -48,8 +48,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, - bool IsINX = false); - bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) { + bool IsRV32Zdinx = false); + bool SelectAddrRegImmRV32Zdinx(SDValue Addr, SDValue &Base, SDValue &Offset) { return SelectAddrRegImm(Addr, Base, Offset, true); } bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 57c18791cc43b..ed0ad27ac9d29 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -25,7 +25,7 @@ def SDT_RISCVSplitF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>; def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>; -def AddrRegImmINX : ComplexPattern; +def AddrRegImmINX : ComplexPattern; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll index f56d47716bd78..01ecaee3d7e7b 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs < %s \ ; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32ZDINX %s +; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+unaligned-scalar-mem -verify-machineinstrs < %s \ +; RUN: -target-abi=ilp32 | FileCheck -check-prefix=RV32ZDINXUALIGNED %s ; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64 | FileCheck -check-prefix=RV64ZDINX %s @@ -14,6 +16,15 @@ define void @foo(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: sd a1, 2044(a0) @@ -35,6 +46,16 @@ define void @foo2(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo2: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo2: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: fadd.d a1, a1, a1 @@ -60,6 +81,16 @@ define void @foo3(ptr nocapture %p) nounwind { ; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo3: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d) +; RV32ZDINXUALIGNED-NEXT: lw a2, %lo(d)(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, %lo(d+4)(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo3: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: lui a1, %hi(d) @@ -87,6 +118,19 @@ define void @foo4(ptr %p) nounwind { ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo4: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) +; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) +; RV32ZDINXUALIGNED-NEXT: sw a2, %lo(d)(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, %lo(d+4)(a0) +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo4: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: addi sp, sp, -16 @@ -116,6 +160,15 @@ define void @foo5(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-NEXT: sw a3, 3(a0) ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo5: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, -2048 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -1(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 3(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo5: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: addi a0, a0, -2048 @@ -141,6 +194,19 @@ define void @foo6(ptr %p, double %d) nounwind { ; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; +; RV32ZDINXUALIGNED-LABEL: foo6: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: lui a3, %hi(.LCPI5_0) +; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a3) +; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a3) +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a4 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; ; RV64ZDINX-LABEL: foo6: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: lui a2, %hi(.LCPI5_0) @@ -154,3 +220,276 @@ entry: store double %add, ptr %add.ptr, align 8 ret void } + +define void @foo7(ptr nocapture %p) nounwind { +; RV32ZDINX-LABEL: foo7: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: lui a1, %hi(d) +; RV32ZDINX-NEXT: lw a2, %lo(d+4)(a1) +; RV32ZDINX-NEXT: addi a1, a1, %lo(d) +; RV32ZDINX-NEXT: sw a2, 8(sp) +; RV32ZDINX-NEXT: lw a1, 8(a1) +; RV32ZDINX-NEXT: sw a1, 12(sp) +; RV32ZDINX-NEXT: lw a2, 8(sp) +; RV32ZDINX-NEXT: lw a3, 12(sp) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) +; RV32ZDINX-NEXT: addi sp, sp, 16 +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo7: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d) +; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(d) +; RV32ZDINXUALIGNED-NEXT: lw a2, 4(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 8(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo7: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: lui a1, %hi(d) +; RV64ZDINX-NEXT: addi a2, a1, %lo(d) +; RV64ZDINX-NEXT: lwu a2, 8(a2) +; RV64ZDINX-NEXT: lwu a1, %lo(d+4)(a1) +; RV64ZDINX-NEXT: slli a2, a2, 32 +; RV64ZDINX-NEXT: or a1, a2, a1 +; RV64ZDINX-NEXT: sd a1, 2044(a0) +; RV64ZDINX-NEXT: ret +entry: + %p2 = getelementptr inbounds i8, ptr @d, i32 4 + %0 = load double, ptr %p2, align 4 + %add.ptr = getelementptr inbounds i8, ptr %p, i64 2044 + store double %0, ptr %add.ptr, align 8 + ret void +} + +define void @foo8(ptr %p) nounwind { +; RV32ZDINX-LABEL: foo8: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: addi a1, a0, 2047 +; RV32ZDINX-NEXT: lw a2, -3(a1) +; RV32ZDINX-NEXT: lw a3, 1(a1) +; RV32ZDINX-NEXT: sw a0, 8(sp) +; RV32ZDINX-NEXT: sw a2, 0(sp) +; RV32ZDINX-NEXT: sw a3, 4(sp) +; RV32ZDINX-NEXT: lw a0, 4(sp) +; RV32ZDINX-NEXT: lui a1, %hi(d) +; RV32ZDINX-NEXT: addi a2, a1, %lo(d) +; RV32ZDINX-NEXT: sw a0, 8(a2) +; RV32ZDINX-NEXT: lw a0, 0(sp) +; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a1) +; RV32ZDINX-NEXT: addi sp, sp, 16 +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo8: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) +; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(d) +; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 8(a0) +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo8: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: addi sp, sp, -16 +; RV64ZDINX-NEXT: ld a1, 2044(a0) +; RV64ZDINX-NEXT: sd a0, 8(sp) +; RV64ZDINX-NEXT: lui a0, %hi(d) +; RV64ZDINX-NEXT: addi a2, a0, %lo(d) +; RV64ZDINX-NEXT: sw a1, %lo(d+4)(a0) +; RV64ZDINX-NEXT: srli a1, a1, 32 +; RV64ZDINX-NEXT: sw a1, 8(a2) +; RV64ZDINX-NEXT: addi sp, sp, 16 +; RV64ZDINX-NEXT: ret +entry: + %p.addr = alloca ptr, align 8 + store ptr %p, ptr %p.addr, align 8 + %0 = load ptr, ptr %p.addr, align 8 + %add.ptr = getelementptr inbounds i8, ptr %0, i64 2044 + %1 = load double, ptr %add.ptr, align 8 + %p2 = getelementptr inbounds i8, ptr @d, i32 4 + store double %1, ptr %p2, align 4 + ret void +} + +@e = global double 4.2, align 4 + +define void @foo9(ptr nocapture %p) nounwind { +; RV32ZDINX-LABEL: foo9: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: lui a1, %hi(e) +; RV32ZDINX-NEXT: lw a2, %lo(e)(a1) +; RV32ZDINX-NEXT: sw a2, 8(sp) +; RV32ZDINX-NEXT: addi a1, a1, %lo(e) +; RV32ZDINX-NEXT: lw a1, 4(a1) +; RV32ZDINX-NEXT: sw a1, 12(sp) +; RV32ZDINX-NEXT: lw a2, 8(sp) +; RV32ZDINX-NEXT: lw a3, 12(sp) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) +; RV32ZDINX-NEXT: addi sp, sp, 16 +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo9: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(e) +; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(e) +; RV32ZDINXUALIGNED-NEXT: lw a2, 0(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 4(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo9: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: lui a1, %hi(e) +; RV64ZDINX-NEXT: addi a2, a1, %lo(e) +; RV64ZDINX-NEXT: lwu a2, 4(a2) +; RV64ZDINX-NEXT: lwu a1, %lo(e)(a1) +; RV64ZDINX-NEXT: slli a2, a2, 32 +; RV64ZDINX-NEXT: or a1, a2, a1 +; RV64ZDINX-NEXT: sd a1, 2044(a0) +; RV64ZDINX-NEXT: ret +entry: + %0 = load double, ptr @e, align 4 + %add.ptr = getelementptr inbounds i8, ptr %p, i64 2044 + store double %0, ptr %add.ptr, align 8 + ret void +} + +define void @foo10(ptr %p) nounwind { +; RV32ZDINX-LABEL: foo10: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: addi a1, a0, 2047 +; RV32ZDINX-NEXT: lw a2, -3(a1) +; RV32ZDINX-NEXT: lw a3, 1(a1) +; RV32ZDINX-NEXT: sw a0, 8(sp) +; RV32ZDINX-NEXT: sw a2, 0(sp) +; RV32ZDINX-NEXT: sw a3, 4(sp) +; RV32ZDINX-NEXT: lw a0, 4(sp) +; RV32ZDINX-NEXT: lui a1, %hi(e) +; RV32ZDINX-NEXT: addi a2, a1, %lo(e) +; RV32ZDINX-NEXT: sw a0, 4(a2) +; RV32ZDINX-NEXT: lw a0, 0(sp) +; RV32ZDINX-NEXT: sw a0, %lo(e)(a1) +; RV32ZDINX-NEXT: addi sp, sp, 16 +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo10: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) +; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(e) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(e) +; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0) +; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo10: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: addi sp, sp, -16 +; RV64ZDINX-NEXT: ld a1, 2044(a0) +; RV64ZDINX-NEXT: sd a0, 8(sp) +; RV64ZDINX-NEXT: lui a0, %hi(e) +; RV64ZDINX-NEXT: sw a1, %lo(e)(a0) +; RV64ZDINX-NEXT: addi a0, a0, %lo(e) +; RV64ZDINX-NEXT: srli a1, a1, 32 +; RV64ZDINX-NEXT: sw a1, 4(a0) +; RV64ZDINX-NEXT: addi sp, sp, 16 +; RV64ZDINX-NEXT: ret +entry: + %p.addr = alloca ptr, align 8 + store ptr %p, ptr %p.addr, align 8 + %0 = load ptr, ptr %p.addr, align 8 + %add.ptr = getelementptr inbounds i8, ptr %0, i64 2044 + %1 = load double, ptr %add.ptr, align 8 + store double %1, ptr @e, align 4 + ret void +} + +define void @foo11(ptr nocapture %p, double %d) nounwind { +; RV32ZDINX-LABEL: foo11: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: lui a2, 1 +; RV32ZDINX-NEXT: add a0, a0, a2 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, -4(a0) +; RV32ZDINX-NEXT: sw a3, 0(a0) +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo11: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: lui a2, 1 +; RV32ZDINXUALIGNED-NEXT: add a0, a0, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -4(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 0(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo11: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: addi a0, a0, 2047 +; RV64ZDINX-NEXT: sd a1, 2045(a0) +; RV64ZDINX-NEXT: ret +entry: + %add.ptr = getelementptr inbounds i8, ptr %p, i64 4092 + store double %d, ptr %add.ptr, align 8 + ret void +} + +define void @foo12(ptr nocapture %p, double %d) nounwind { +; RV32ZDINX-LABEL: foo12: +; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: lui a2, 2 +; RV32ZDINX-NEXT: addi a2, a2, 2047 +; RV32ZDINX-NEXT: add a0, a0, a2 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, 0(a0) +; RV32ZDINX-NEXT: sw a3, 4(a0) +; RV32ZDINX-NEXT: ret +; +; RV32ZDINXUALIGNED-LABEL: foo12: +; RV32ZDINXUALIGNED: # %bb.0: # %entry +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: lui a2, 2 +; RV32ZDINXUALIGNED-NEXT: addi a2, a2, 2047 +; RV32ZDINXUALIGNED-NEXT: add a0, a0, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0) +; RV32ZDINXUALIGNED-NEXT: ret +; +; RV64ZDINX-LABEL: foo12: +; RV64ZDINX: # %bb.0: # %entry +; RV64ZDINX-NEXT: lui a2, 2 +; RV64ZDINX-NEXT: add a0, a0, a2 +; RV64ZDINX-NEXT: sd a1, 2047(a0) +; RV64ZDINX-NEXT: ret +entry: + %add.ptr = getelementptr inbounds i8, ptr %p, i64 10239 + store double %d, ptr %add.ptr, align 8 + ret void +}