From d3db27e843b610c0397a119a89b3867f6a0b2a30 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Thu, 28 Nov 2024 00:29:21 +0300 Subject: [PATCH 1/7] [Xtensa] Implement variable arguments support. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 233 +++++++++++++++++- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 9 + .../Target/Xtensa/XtensaMachineFunctionInfo.h | 16 +- llvm/test/CodeGen/Xtensa/vararg.ll | 87 +++++++ 4 files changed, 338 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/vararg.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 5450222a7b2e1..d04c82e533d5a 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -14,6 +14,7 @@ #include "XtensaISelLowering.h" #include "XtensaConstantPoolValue.h" #include "XtensaInstrInfo.h" +#include "XtensaMachineFunctionInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -133,6 +134,14 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + // VASTART and VACOPY need to deal with the Xtensa-specific varargs + // structure, but VAEND is a no-op. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // we use special va_list structure so we have to customize this + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Compute derived properties from the register classes computeRegisterProperties(STI.getRegisterInfo()); } @@ -211,6 +220,11 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint( TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +unsigned XtensaTargetLowering::getVaListSizeInBits(const DataLayout &DL) const { + // 2 * sizeof(int*) + sizeof(int) + return 3 * 4; +} + //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// @@ -304,13 +318,14 @@ SDValue XtensaTargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + XtensaMachineFunctionInfo *XtensaFI = MF.getInfo(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + + XtensaFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector OutChains; - if (IsVarArg) - report_fatal_error("Var arg not supported by FormalArguments Lowering"); - // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, @@ -378,6 +393,68 @@ SDValue XtensaTargetLowering::LowerFormalArguments( } } + if (IsVarArg) { + static const MCPhysReg XtensaArgRegs[6] = { + Xtensa::A2, Xtensa::A3, Xtensa::A4, Xtensa::A5, Xtensa::A6, Xtensa::A7}; + ArrayRef ArgRegs = ArrayRef(XtensaArgRegs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &Xtensa::ARRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned RegSize = 4; + MVT RegTy = MVT::getIntegerVT(RegSize * 8); + + XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register + + XtensaFI->setVarArgsStackOffset(MFI.CreateFixedObject( + PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true)); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getStackSize(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + XtensaFI->setVarArgsFrameIndex(FI); + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { + const unsigned Reg = RegInfo.createVirtualRegister(RC); + unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); + + // Argument passed in FrameReg we save in A8 (in emitPrologue), + // so load argument from A8 + if (ArgRegs[I] == FrameReg) { + RegInfo.addLiveIn(Xtensa::A8, Reg); + } else { + RegInfo.addLiveIn(ArgRegs[I], Reg); + } + + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + cast(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + } + // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { @@ -579,9 +656,6 @@ XtensaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - if (IsVarArg) - report_fatal_error("VarArg not supported"); - MachineFunction &MF = DAG.getMachineFunction(); // Assign locations to each returned value. @@ -859,6 +933,147 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + XtensaMachineFunctionInfo *XtensaFI = MF.getInfo(); + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + EVT PtrVT = Addr.getValueType(); + SDLoc DL(Op); + + // Struct va_list_tag + // int32 *va_stk - points to the arguments passed in memory + // int32 *va_reg - points to the registers with arguments saved in memory + // int32 va_ndx - offset from va_stk or va_reg pointers which points to the + // next variable argument + + SDValue VAIndex; + SDValue StackOffsetFI = + DAG.getFrameIndex(XtensaFI->getVarArgsStackOffset(), PtrVT); + unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2; + + // If first variable argument passed in registers (maximum words in registers + // is 6) then set va_ndx to the position of this argument in registers area + // stored in memory (va_reg pointer). Otherwise va_ndx should point to the + // position of the first variable argument on stack (va_stk pointer). + if (ArgWords < 6) { + VAIndex = DAG.getConstant(ArgWords * 4, DL, MVT::i32); + } else { + VAIndex = DAG.getConstant(32, DL, MVT::i32); + } + + SDValue FrameIndex = + DAG.getFrameIndex(XtensaFI->getVarArgsFrameIndex(), PtrVT); + uint64_t FrameOffset = PtrVT.getStoreSize(); + const Value *SV = cast(Op.getOperand(2))->getValue(); + + // Store pointer to arguments given on stack (va_stk) + SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI, + DAG.getConstant(32, DL, PtrVT)); + SDValue StoreStackPtr = + DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV)); + + uint64_t NextOffset = FrameOffset; + SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr, + DAG.getConstant(NextOffset, DL, PtrVT)); + + // Store pointer to arguments given on registers (va_reg) + SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr, + MachinePointerInfo(SV, NextOffset)); + NextOffset += FrameOffset; + NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr, + DAG.getConstant(NextOffset, DL, PtrVT)); + + // Store third word : position in bytes of the first VA argument (va_ndx) + return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr, + MachinePointerInfo(SV, NextOffset)); +} + +SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + unsigned VAListSize = getVaListSizeInBits(DAG.getDataLayout()) / 8; + return DAG.getMemcpy( + Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo()); +} + +SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + EVT PtrVT = Op.getValueType(); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + auto &TD = DAG.getDataLayout(); + Align ArgAlignment = TD.getPrefTypeAlign(VT.getTypeForEVT(*DAG.getContext())); + unsigned ArgAlignInBytes = ArgAlignment.value(); + unsigned ArgSizeInBytes = + TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); + unsigned VASizeInBytes = (ArgSizeInBytes + 3) & 0x3; + + // va_stk + SDValue VAStack = + DAG.getLoad(MVT::i32, DL, InChain, VAListPtr, MachinePointerInfo()); + InChain = VAStack.getValue(1); + + // va_reg + SDValue VARegPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAListPtr, + DAG.getConstant(4, DL, MVT::i32)); + SDValue VAReg = + DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo()); + InChain = VAReg.getValue(1); + + // va_ndx + SDValue VarArgIndexPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VARegPtr, + DAG.getConstant(4, DL, MVT::i32)); + SDValue VAIndex = + DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo()); + InChain = VAIndex.getValue(1); + + SDValue OrigIndex = VAIndex; + + if (ArgAlignInBytes > 4) { + OrigIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex, + DAG.getConstant(ArgAlignInBytes - 1, DL, MVT::i32)); + OrigIndex = DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex, + DAG.getConstant(-ArgAlignInBytes, DL, MVT::i32)); + } + + VAIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex, + DAG.getConstant(VASizeInBytes, DL, MVT::i32)); + + SDValue CC = DAG.getSetCC(DL, MVT::i32, OrigIndex, + DAG.getConstant(6 * 4, DL, MVT::i32), ISD::SETLE); + + SDValue StkIndex = + DAG.getNode(ISD::ADD, DL, PtrVT, VAIndex, + DAG.getConstant(32 + VASizeInBytes, DL, MVT::i32)); + + CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32), + ISD::SETLE); + + SDValue Array = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAReg, VAStack); + + VAIndex = DAG.getNode(ISD::SELECT, DL, MVT::i32, CC, VAIndex, StkIndex); + + CC = DAG.getSetCC(DL, MVT::i32, VAIndex, DAG.getConstant(6 * 4, DL, MVT::i32), + ISD::SETLE); + + SDValue VAIndexStore = DAG.getStore(InChain, DL, VAIndex, VarArgIndexPtr, + MachinePointerInfo(SV)); + InChain = VAIndexStore; + + SDValue Addr = DAG.getNode(ISD::SUB, DL, PtrVT, VAIndex, + DAG.getConstant(VASizeInBytes, DL, MVT::i32)); + + Addr = DAG.getNode(ISD::ADD, DL, PtrVT, Array, Addr); + + return DAG.getLoad(VT, DL, InChain, Addr, MachinePointerInfo()); +} + SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1001,6 +1216,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, return LowerFRAMEADDR(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index f1cd00c41437a..973b51cd73e4b 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -74,6 +74,9 @@ class XtensaTargetLowering : public TargetLowering { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + /// Returns the size of the platform's va_list object. + unsigned getVaListSizeInBits(const DataLayout &DL) const override; + const char *getTargetNodeName(unsigned Opcode) const override; std::pair @@ -148,6 +151,12 @@ class XtensaTargetLowering : public TargetLowering { SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h index c38c060b9387f..36fbd018bb8c9 100644 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -24,10 +24,14 @@ namespace llvm { class XtensaMachineFunctionInfo : public MachineFunctionInfo { /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. int BranchRelaxationScratchFrameIndex = -1; + unsigned VarArgsFirstGPR; + int VarArgsStackOffset; + unsigned VarArgsFrameIndex; public: explicit XtensaMachineFunctionInfo(const Function &F, - const TargetSubtargetInfo *STI) {} + const TargetSubtargetInfo *STI) + : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {} int getBranchRelaxationScratchFrameIndex() const { return BranchRelaxationScratchFrameIndex; @@ -35,6 +39,16 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { void setBranchRelaxationScratchFrameIndex(int Index) { BranchRelaxationScratchFrameIndex = Index; } + + unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + + int getVarArgsStackOffset() const { return VarArgsStackOffset; } + void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + + // Get and set the frame index of the first stack vararg. + unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } }; } // namespace llvm diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll new file mode 100644 index 0000000000000..baf1bd34a3124 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/vararg.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s --mtriple=xtensa | FileCheck %s + +define void @test(...) { +; CHECK-LABEL: test: +; CHECK: .cfi_startproc +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: s32i a7, a1, 20 +; CHECK-NEXT: s32i a6, a1, 16 +; CHECK-NEXT: s32i a5, a1, 12 +; CHECK-NEXT: s32i a4, a1, 8 +; CHECK-NEXT: s32i a3, a1, 4 +; CHECK-NEXT: s32i a2, a1, 0 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + ret void +} + + +declare void @llvm.va_start(ptr) nounwind +declare void @llvm.va_end(ptr) nounwind +declare void @f(i32) nounwind +define void @test_vararg(...) nounwind { +; CHECK-LABEL: test_vararg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -48 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 12 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a13, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 36 +; CHECK-NEXT: s32i a6, a1, 32 +; CHECK-NEXT: s32i a5, a1, 28 +; CHECK-NEXT: s32i a4, a1, 24 +; CHECK-NEXT: s32i a3, a1, 20 +; CHECK-NEXT: s32i a2, a1, 16 +; CHECK-NEXT: movi a8, 0 +; CHECK-NEXT: s32i a8, a1, 8 +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: s32i a8, a1, 4 +; CHECK-NEXT: addi a8, a1, 48 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: l32r a13, .LCPI1_0 +; CHECK-NEXT: j .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %for.cond +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: s32i a8, a1, 8 +; CHECK-NEXT: add a8, a8, a9 +; CHECK-NEXT: addi a8, a8, -3 +; CHECK-NEXT: l32i a2, a8, 0 +; CHECK-NEXT: callx0 a13 +; CHECK-NEXT: .LBB1_2: # %for.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: l32i a10, a1, 8 +; CHECK-NEXT: addi a8, a10, 3 +; CHECK-NEXT: blt a12, a8, .LBB1_4 +; CHECK-NEXT: # %bb.3: # %for.cond +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: bge a12, a8, .LBB1_1 +; CHECK-NEXT: j .LBB1_5 +; CHECK-NEXT: .LBB1_4: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: l32i a9, a1, 0 +; CHECK-NEXT: bge a12, a8, .LBB1_1 +; CHECK-NEXT: .LBB1_5: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: addi a8, a10, 38 +; CHECK-NEXT: j .LBB1_1 +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + br label %for.cond + +for.cond: + %0 = va_arg ptr %list, i32 + call void @f(i32 %0) + br label %for.cond + + call void @llvm.va_end(ptr %list) + ret void +} From 467e1d09a17f22e04af47dc3b2b9ee099d28965c Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 4 Dec 2024 22:23:28 +0300 Subject: [PATCH 2/7] [Xtensa] Minor fixes. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 85 +++++++------------ llvm/lib/Target/Xtensa/XtensaISelLowering.h | 3 - .../Target/Xtensa/XtensaMachineFunctionInfo.h | 8 +- 3 files changed, 35 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index d04c82e533d5a..ee5409431b973 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -134,10 +134,9 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); - // VASTART and VACOPY need to deal with the Xtensa-specific varargs + // VASTART, VAARG and VACOPY need to deal with the Xtensa-specific varargs // structure, but VAEND is a no-op. setOperationAction(ISD::VASTART, MVT::Other, Custom); - // we use special va_list structure so we have to customize this setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); @@ -220,23 +219,18 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint( TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -unsigned XtensaTargetLowering::getVaListSizeInBits(const DataLayout &DL) const { - // 2 * sizeof(int*) + sizeof(int) - return 3 * 4; -} - //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// #include "XtensaGenCallingConv.inc" +static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4, + Xtensa::A5, Xtensa::A6, Xtensa::A7}; + static bool CC_Xtensa_Custom(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const MCPhysReg IntRegs[] = {Xtensa::A2, Xtensa::A3, Xtensa::A4, - Xtensa::A5, Xtensa::A6, Xtensa::A7}; - if (ArgFlags.isByVal()) { Align ByValAlign = ArgFlags.getNonZeroByValAlign(); unsigned ByValSize = ArgFlags.getByValSize(); @@ -319,9 +313,6 @@ SDValue XtensaTargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); XtensaMachineFunctionInfo *XtensaFI = MF.getInfo(); - EVT PtrVT = getPointerTy(MF.getDataLayout()); - - XtensaFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector OutChains; @@ -338,16 +329,13 @@ SDValue XtensaTargetLowering::LowerFormalArguments( // Arguments stored on registers if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - const TargetRegisterClass *RC; - if (RegVT == MVT::i32) - RC = &Xtensa::ARRegClass; - else + if (RegVT != MVT::i32) report_fatal_error("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on // physical registers into virtual ones - unsigned Register = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Register = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -394,20 +382,18 @@ SDValue XtensaTargetLowering::LowerFormalArguments( } if (IsVarArg) { - static const MCPhysReg XtensaArgRegs[6] = { - Xtensa::A2, Xtensa::A3, Xtensa::A4, Xtensa::A5, Xtensa::A6, Xtensa::A7}; - ArrayRef ArgRegs = ArrayRef(XtensaArgRegs); + ArrayRef ArgRegs = ArrayRef(IntRegs); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &Xtensa::ARRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned RegSize = 4; - MVT RegTy = MVT::getIntegerVT(RegSize * 8); + MVT RegTy = MVT::i32; XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register - XtensaFI->setVarArgsStackOffset(MFI.CreateFixedObject( - PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true)); + XtensaFI->setVarArgsOnStackFrameIndex( + MFI.CreateFixedObject(4, CCInfo.getStackSize(), true)); // Offset of the first variable argument from stack pointer, and size of // the vararg save area. For now, the varargs save area is either zero or @@ -422,36 +408,26 @@ SDValue XtensaTargetLowering::LowerFormalArguments( } else { VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx); VaArgOffset = -VarArgsSaveSize; - } - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); - XtensaFI->setVarArgsFrameIndex(FI); - - // Copy the integer registers that may have been used for passing varargs - // to the vararg save area. - for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { - const unsigned Reg = RegInfo.createVirtualRegister(RC); - unsigned FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF); - - // Argument passed in FrameReg we save in A8 (in emitPrologue), - // so load argument from A8 - if (ArgRegs[I] == FrameReg) { - RegInfo.addLiveIn(Xtensa::A8, Reg); - } else { + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + XtensaFI->setVarArgsInRegsFrameIndex(FI); + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { + const Register Reg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(ArgRegs[I], Reg); - } - SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); - FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, - MachinePointerInfo::getFixedStack(MF, FI)); - cast(Store.getNode()) - ->getMemOperand() - ->setValue((Value *)nullptr); - OutChains.push_back(Store); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); + SDValue PtrOff = + DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + OutChains.push_back(Store); + } } } @@ -950,7 +926,7 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, SDValue VAIndex; SDValue StackOffsetFI = - DAG.getFrameIndex(XtensaFI->getVarArgsStackOffset(), PtrVT); + DAG.getFrameIndex(XtensaFI->getVarArgsOnStackFrameIndex(), PtrVT); unsigned ArgWords = XtensaFI->getVarArgsFirstGPR() - 2; // If first variable argument passed in registers (maximum words in registers @@ -964,7 +940,7 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, } SDValue FrameIndex = - DAG.getFrameIndex(XtensaFI->getVarArgsFrameIndex(), PtrVT); + DAG.getFrameIndex(XtensaFI->getVarArgsInRegsFrameIndex(), PtrVT); uint64_t FrameOffset = PtrVT.getStoreSize(); const Value *SV = cast(Op.getOperand(2))->getValue(); @@ -991,7 +967,8 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, } SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - unsigned VAListSize = getVaListSizeInBits(DAG.getDataLayout()) / 8; + // Size of the va_list_tag structure + constexpr unsigned VAListSize = 3 * 4; return DAG.getMemcpy( Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4), diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index 973b51cd73e4b..cebd7d2016c8e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -74,9 +74,6 @@ class XtensaTargetLowering : public TargetLowering { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - /// Returns the size of the platform's va_list object. - unsigned getVaListSizeInBits(const DataLayout &DL) const override; - const char *getTargetNodeName(unsigned Opcode) const override; std::pair diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h index 36fbd018bb8c9..86b0dad4403a4 100644 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -43,12 +43,12 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } - int getVarArgsStackOffset() const { return VarArgsStackOffset; } - void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + int getVarArgsOnStackFrameIndex() const { return VarArgsStackOffset; } + void setVarArgsOnStackFrameIndex(int Offset) { VarArgsStackOffset = Offset; } // Get and set the frame index of the first stack vararg. - unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + unsigned getVarArgsInRegsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } }; } // namespace llvm From dfb7ee4ee9a66009f5b4cdfcad8337d0174fcbdf Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Thu, 5 Dec 2024 01:04:15 +0300 Subject: [PATCH 3/7] [Xtensa] Minor fixes. Fix variable names in XtensaMachineFunctionInfo class. Fix LowerFormalArguments function. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 16 +++++++------- .../Target/Xtensa/XtensaMachineFunctionInfo.h | 21 ++++++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index ee5409431b973..4f02acf980038 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -335,8 +335,8 @@ SDValue XtensaTargetLowering::LowerFormalArguments( // Transform the arguments stored on // physical registers into virtual ones - unsigned Register = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass); - SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Register, RegVT); + Register Reg = MF.addLiveIn(VA.getLocReg(), &Xtensa::ARRegClass); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then @@ -382,8 +382,8 @@ SDValue XtensaTargetLowering::LowerFormalArguments( } if (IsVarArg) { - ArrayRef ArgRegs = ArrayRef(IntRegs); - unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + unsigned Idx = CCInfo.getFirstUnallocated(IntRegs); + unsigned ArgRegsNum = std::size(IntRegs); const TargetRegisterClass *RC = &Xtensa::ARRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -402,11 +402,11 @@ SDValue XtensaTargetLowering::LowerFormalArguments( // If all registers are allocated, then all varargs must be passed on the // stack and we don't need to save any argregs. - if (ArgRegs.size() == Idx) { + if (ArgRegsNum == Idx) { VaArgOffset = CCInfo.getStackSize(); VarArgsSaveSize = 0; } else { - VarArgsSaveSize = RegSize * (ArgRegs.size() - Idx); + VarArgsSaveSize = RegSize * (ArgRegsNum - Idx); VaArgOffset = -VarArgsSaveSize; // Record the frame index of the first variable argument @@ -416,9 +416,9 @@ SDValue XtensaTargetLowering::LowerFormalArguments( // Copy the integer registers that may have been used for passing varargs // to the vararg save area. - for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSize) { + for (unsigned I = Idx; I < ArgRegsNum; ++I, VaArgOffset += RegSize) { const Register Reg = RegInfo.createVirtualRegister(RC); - RegInfo.addLiveIn(ArgRegs[I], Reg); + RegInfo.addLiveIn(IntRegs[I], Reg); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h index 86b0dad4403a4..f7f6922bec041 100644 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -25,13 +25,14 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. int BranchRelaxationScratchFrameIndex = -1; unsigned VarArgsFirstGPR; - int VarArgsStackOffset; - unsigned VarArgsFrameIndex; + unsigned VarArgsOnStackFrameIndex; + unsigned VarArgsInRegsFrameIndex; public: explicit XtensaMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) - : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {} + : VarArgsFirstGPR(0), VarArgsOnStackFrameIndex(0), + VarArgsInRegsFrameIndex(0) {} int getBranchRelaxationScratchFrameIndex() const { return BranchRelaxationScratchFrameIndex; @@ -43,12 +44,18 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } - int getVarArgsOnStackFrameIndex() const { return VarArgsStackOffset; } - void setVarArgsOnStackFrameIndex(int Offset) { VarArgsStackOffset = Offset; } + unsigned getVarArgsOnStackFrameIndex() const { + return VarArgsOnStackFrameIndex; + } + void setVarArgsOnStackFrameIndex(unsigned FI) { + VarArgsOnStackFrameIndex = FI; + } // Get and set the frame index of the first stack vararg. - unsigned getVarArgsInRegsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + unsigned getVarArgsInRegsFrameIndex() const { + return VarArgsInRegsFrameIndex; + } + void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsInRegsFrameIndex = FI; } }; } // namespace llvm From 3998c2f8caf2ea4f1d4eb1cbed6eb79e6fcdcfb3 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Thu, 5 Dec 2024 01:19:50 +0300 Subject: [PATCH 4/7] [Xtensa] Fix XtensaMachineFunctionInfo. --- .../Target/Xtensa/XtensaMachineFunctionInfo.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h index f7f6922bec041..c430562091ba7 100644 --- a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h +++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h @@ -25,8 +25,8 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { /// FrameIndex of the spill slot for the scratch register in BranchRelaxation. int BranchRelaxationScratchFrameIndex = -1; unsigned VarArgsFirstGPR; - unsigned VarArgsOnStackFrameIndex; - unsigned VarArgsInRegsFrameIndex; + int VarArgsOnStackFrameIndex; + int VarArgsInRegsFrameIndex; public: explicit XtensaMachineFunctionInfo(const Function &F, @@ -44,18 +44,12 @@ class XtensaMachineFunctionInfo : public MachineFunctionInfo { unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } - unsigned getVarArgsOnStackFrameIndex() const { - return VarArgsOnStackFrameIndex; - } - void setVarArgsOnStackFrameIndex(unsigned FI) { - VarArgsOnStackFrameIndex = FI; - } + int getVarArgsOnStackFrameIndex() const { return VarArgsOnStackFrameIndex; } + void setVarArgsOnStackFrameIndex(int FI) { VarArgsOnStackFrameIndex = FI; } // Get and set the frame index of the first stack vararg. - unsigned getVarArgsInRegsFrameIndex() const { - return VarArgsInRegsFrameIndex; - } - void setVarArgsInRegsFrameIndex(unsigned FI) { VarArgsInRegsFrameIndex = FI; } + int getVarArgsInRegsFrameIndex() const { return VarArgsInRegsFrameIndex; } + void setVarArgsInRegsFrameIndex(int FI) { VarArgsInRegsFrameIndex = FI; } }; } // namespace llvm From 862f1de8b894577cd760cc5422040b0c9c0f1b1d Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Fri, 6 Dec 2024 16:35:40 +0300 Subject: [PATCH 5/7] [Xtensa] Minor fixes in lowering VASTART/VAARG. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 4f02acf980038..6a2c7de306293 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -947,19 +947,19 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, // Store pointer to arguments given on stack (va_stk) SDValue StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackOffsetFI, DAG.getConstant(32, DL, PtrVT)); + SDValue StoreStackPtr = DAG.getStore(Chain, DL, StackPtr, Addr, MachinePointerInfo(SV)); uint64_t NextOffset = FrameOffset; - SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr, - DAG.getConstant(NextOffset, DL, PtrVT)); + SDValue NextPtr = + DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset)); // Store pointer to arguments given on registers (va_reg) SDValue StoreRegPtr = DAG.getStore(StoreStackPtr, DL, FrameIndex, NextPtr, MachinePointerInfo(SV, NextOffset)); NextOffset += FrameOffset; - NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr, - DAG.getConstant(NextOffset, DL, PtrVT)); + NextPtr = DAG.getObjectPtrOffset(DL, Addr, TypeSize::getFixed(NextOffset)); // Store third word : position in bytes of the first VA argument (va_ndx) return DAG.getStore(StoreRegPtr, DL, VAIndex, NextPtr, @@ -997,15 +997,15 @@ SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { InChain = VAStack.getValue(1); // va_reg - SDValue VARegPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAListPtr, - DAG.getConstant(4, DL, MVT::i32)); + SDValue VARegPtr = + DAG.getObjectPtrOffset(DL, VAListPtr, TypeSize::getFixed(4)); SDValue VAReg = DAG.getLoad(MVT::i32, DL, InChain, VARegPtr, MachinePointerInfo()); InChain = VAReg.getValue(1); // va_ndx - SDValue VarArgIndexPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VARegPtr, - DAG.getConstant(4, DL, MVT::i32)); + SDValue VarArgIndexPtr = + DAG.getObjectPtrOffset(DL, VARegPtr, TypeSize::getFixed(4)); SDValue VAIndex = DAG.getLoad(MVT::i32, DL, InChain, VarArgIndexPtr, MachinePointerInfo()); InChain = VAIndex.getValue(1); From ae7bff062a4eb45b6179e571ced800251f3a6671 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Tue, 10 Dec 2024 02:50:44 +0300 Subject: [PATCH 6/7] [Xtensa] Fix lowering VACOPY/VAARG. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 25 +- llvm/test/CodeGen/Xtensa/vararg.ll | 537 ++++++++++++++++-- 2 files changed, 507 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 6a2c7de306293..45bde4a180c1e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -969,27 +969,34 @@ SDValue XtensaTargetLowering::LowerVASTART(SDValue Op, SDValue XtensaTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // Size of the va_list_tag structure constexpr unsigned VAListSize = 3 * 4; - return DAG.getMemcpy( - Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), Align(4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo()); + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(4))->getValue(); + SDLoc DL(Op); + + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, + DAG.getConstant(VAListSize, SDLoc(Op), MVT::i32), + Align(4), /*isVolatile*/ false, /*AlwaysInline*/ true, + /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), + MachinePointerInfo(SrcSV)); } SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = Op.getValueType(); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); auto &TD = DAG.getDataLayout(); - Align ArgAlignment = TD.getPrefTypeAlign(VT.getTypeForEVT(*DAG.getContext())); + Align ArgAlignment = TD.getABITypeAlign(Ty); unsigned ArgAlignInBytes = ArgAlignment.value(); - unsigned ArgSizeInBytes = - TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); - unsigned VASizeInBytes = (ArgSizeInBytes + 3) & 0x3; + unsigned ArgSizeInBytes = TD.getTypeAllocSize(Ty); + unsigned VASizeInBytes = llvm::alignTo(ArgSizeInBytes, 4); // va_stk SDValue VAStack = diff --git a/llvm/test/CodeGen/Xtensa/vararg.ll b/llvm/test/CodeGen/Xtensa/vararg.ll index baf1bd34a3124..d85752e11fa6b 100644 --- a/llvm/test/CodeGen/Xtensa/vararg.ll +++ b/llvm/test/CodeGen/Xtensa/vararg.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s --mtriple=xtensa | FileCheck %s -define void @test(...) { -; CHECK-LABEL: test: +define void @vararg(...) { +; CHECK-LABEL: vararg: ; CHECK: .cfi_startproc ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: addi a8, a1, -32 @@ -21,66 +21,511 @@ entry: ret void } - declare void @llvm.va_start(ptr) nounwind declare void @llvm.va_end(ptr) nounwind -declare void @f(i32) nounwind -define void @test_vararg(...) nounwind { -; CHECK-LABEL: test_vararg: +declare void @f_i32(i32) nounwind +declare void @f_i64(i64) nounwind + +define void @vararg_fixed_0(...) nounwind { +; CHECK-LABEL: vararg_fixed_0: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi a8, a1, -48 ; CHECK-NEXT: or a1, a8, a8 -; CHECK-NEXT: s32i a0, a1, 12 # 4-byte Folded Spill -; CHECK-NEXT: s32i a12, a1, 8 # 4-byte Folded Spill -; CHECK-NEXT: s32i a13, a1, 4 # 4-byte Folded Spill -; CHECK-NEXT: s32i a7, a1, 36 -; CHECK-NEXT: s32i a6, a1, 32 -; CHECK-NEXT: s32i a5, a1, 28 -; CHECK-NEXT: s32i a4, a1, 24 -; CHECK-NEXT: s32i a3, a1, 20 -; CHECK-NEXT: s32i a2, a1, 16 -; CHECK-NEXT: movi a8, 0 -; CHECK-NEXT: s32i a8, a1, 8 -; CHECK-NEXT: addi a8, a1, 16 -; CHECK-NEXT: s32i a8, a1, 4 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 32 +; CHECK-NEXT: s32i a6, a1, 28 +; CHECK-NEXT: s32i a5, a1, 24 +; CHECK-NEXT: s32i a4, a1, 20 +; CHECK-NEXT: s32i a3, a1, 16 +; CHECK-NEXT: s32i a2, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 ; CHECK-NEXT: addi a8, a1, 48 ; CHECK-NEXT: addi a8, a8, -32 ; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 4 ; CHECK-NEXT: movi a12, 24 -; CHECK-NEXT: l32r a13, .LCPI1_0 -; CHECK-NEXT: j .LBB1_2 -; CHECK-NEXT: .LBB1_1: # %for.cond -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: s32i a8, a1, 8 -; CHECK-NEXT: add a8, a8, a9 -; CHECK-NEXT: addi a8, a8, -3 +; CHECK-NEXT: blt a12, a9, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 40 +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 ; CHECK-NEXT: l32i a2, a8, 0 -; CHECK-NEXT: callx0 a13 -; CHECK-NEXT: .LBB1_2: # %for.cond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: l32i a10, a1, 8 -; CHECK-NEXT: addi a8, a10, 3 -; CHECK-NEXT: blt a12, a8, .LBB1_4 -; CHECK-NEXT: # %bb.3: # %for.cond -; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: l32r a8, .LCPI1_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 ; CHECK-NEXT: l32i a9, a1, 4 -; CHECK-NEXT: bge a12, a8, .LBB1_1 -; CHECK-NEXT: j .LBB1_5 -; CHECK-NEXT: .LBB1_4: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: l32i a9, a1, 0 -; CHECK-NEXT: bge a12, a8, .LBB1_1 -; CHECK-NEXT: .LBB1_5: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: addi a8, a10, 38 -; CHECK-NEXT: j .LBB1_1 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB1_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB1_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB1_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB1_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB1_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB1_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB1_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB1_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI1_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 48 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret entry: %list = alloca ptr, align 4 call void @llvm.va_start(ptr %list) - br label %for.cond -for.cond: %0 = va_arg ptr %list, i32 - call void @f(i32 %0) - br label %for.cond + call void @f_i32(i32 %0) + %1 = va_arg ptr %list, i64 + call void @f_i64(i64 %1) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_1(i32 %a1, ...) nounwind { +; CHECK-LABEL: vararg_fixed_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 28 +; CHECK-NEXT: s32i a6, a1, 24 +; CHECK-NEXT: s32i a5, a1, 20 +; CHECK-NEXT: s32i a4, a1, 16 +; CHECK-NEXT: s32i a3, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 8 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 44 +; CHECK-NEXT: .LBB2_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI2_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB2_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB2_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB2_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB2_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB2_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB2_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB2_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB2_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI2_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, ...) nounwind { +; CHECK-LABEL: vararg_fixed_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 16 +; CHECK-NEXT: s32i a6, a1, 12 +; CHECK-NEXT: addi a10, a1, 12 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 20 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB3_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 56 +; CHECK-NEXT: .LBB3_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI3_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB3_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB3_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB3_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB3_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB3_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB3_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB3_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB3_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI3_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 32 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_5(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind { +; CHECK-LABEL: vararg_fixed_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: s32i a7, a1, 12 +; CHECK-NEXT: addi a9, a1, 12 +; CHECK-NEXT: s32i a9, a1, 4 +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a12, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB4_2: # %entry +; CHECK-NEXT: blt a12, a12, .LBB4_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: or a9, a12, a12 +; CHECK-NEXT: j .LBB4_5 +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: movi a9, 60 +; CHECK-NEXT: .LBB4_5: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI4_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB4_7 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB4_7: # %entry +; CHECK-NEXT: bge a12, a10, .LBB4_9 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB4_9: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB4_11 +; CHECK-NEXT: # %bb.10: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB4_11: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB4_13 +; CHECK-NEXT: # %bb.12: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB4_13: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI4_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_6(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, ...) nounwind { +; CHECK-LABEL: vararg_fixed_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: addi a10, a1, 0 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 36 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB5_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB5_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB5_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 72 +; CHECK-NEXT: .LBB5_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI5_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB5_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB5_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB5_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB5_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB5_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB5_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB5_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB5_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI5_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) + + call void @llvm.va_end(ptr %list) + ret void +} + +define void @vararg_fixed_7(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, ...) nounwind { +; CHECK-LABEL: vararg_fixed_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a8, a1, -16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill +; CHECK-NEXT: s32i a12, a1, 4 # 4-byte Folded Spill +; CHECK-NEXT: addi a10, a1, 0 +; CHECK-NEXT: s32i a10, a1, 4 +; CHECK-NEXT: addi a8, a1, 20 +; CHECK-NEXT: addi a8, a8, -32 +; CHECK-NEXT: s32i a8, a1, 0 +; CHECK-NEXT: movi a9, 36 +; CHECK-NEXT: movi a12, 24 +; CHECK-NEXT: blt a12, a9, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: or a8, a10, a10 +; CHECK-NEXT: .LBB6_2: # %entry +; CHECK-NEXT: bge a12, a9, .LBB6_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: movi a9, 72 +; CHECK-NEXT: .LBB6_4: # %entry +; CHECK-NEXT: s32i a9, a1, 8 +; CHECK-NEXT: add a8, a9, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a8, a8, 0 +; CHECK-NEXT: add a2, a8, a2 +; CHECK-NEXT: l32r a8, .LCPI6_0 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a7, a1, 8 +; CHECK-NEXT: addi a10, a7, 4 +; CHECK-NEXT: l32i a9, a1, 4 +; CHECK-NEXT: l32i a8, a1, 0 +; CHECK-NEXT: or a11, a8, a8 +; CHECK-NEXT: blt a12, a10, .LBB6_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: or a11, a9, a9 +; CHECK-NEXT: .LBB6_6: # %entry +; CHECK-NEXT: bge a12, a10, .LBB6_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: addi a10, a7, 40 +; CHECK-NEXT: .LBB6_8: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a10, a10, a11 +; CHECK-NEXT: addi a7, a10, -4 +; CHECK-NEXT: l32i a11, a1, 8 +; CHECK-NEXT: addi a10, a11, 4 +; CHECK-NEXT: blt a12, a10, .LBB6_10 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: or a8, a9, a9 +; CHECK-NEXT: .LBB6_10: # %entry +; CHECK-NEXT: l32i a2, a7, 0 +; CHECK-NEXT: bge a12, a10, .LBB6_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: addi a10, a11, 40 +; CHECK-NEXT: .LBB6_12: # %entry +; CHECK-NEXT: s32i a10, a1, 8 +; CHECK-NEXT: add a8, a10, a8 +; CHECK-NEXT: addi a8, a8, -4 +; CHECK-NEXT: l32i a3, a8, 0 +; CHECK-NEXT: l32r a8, .LCPI6_1 +; CHECK-NEXT: callx0 a8 +; CHECK-NEXT: l32i a12, a1, 4 # 4-byte Folded Reload +; CHECK-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload +; CHECK-NEXT: addi a8, a1, 16 +; CHECK-NEXT: or a1, a8, a8 +; CHECK-NEXT: ret +entry: + %list = alloca ptr, align 4 + call void @llvm.va_start(ptr %list) + + %va32 = va_arg ptr %list, i32 + %sum = add nsw i32 %va32, %a1 + call void @f_i32(i32 %sum) + + %va64 = va_arg ptr %list, i64 + call void @f_i64(i64 %va64) call void @llvm.va_end(ptr %list) ret void From 90eaaa4a77fabf8f3e6c346fb91ce320301fa7a4 Mon Sep 17 00:00:00 2001 From: Andrei Safronov Date: Wed, 11 Dec 2024 21:56:12 +0300 Subject: [PATCH 7/7] [Xtensa] Minor fixes. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 45bde4a180c1e..dc9e5f217599e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -389,6 +389,7 @@ SDValue XtensaTargetLowering::LowerFormalArguments( MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned RegSize = 4; MVT RegTy = MVT::i32; + MVT FITy = getFrameIndexTy(DAG.getDataLayout()); XtensaFI->setVarArgsFirstGPR(Idx + 2); // 2 - number of a2 register @@ -422,8 +423,7 @@ SDValue XtensaTargetLowering::LowerFormalArguments( SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI.CreateFixedObject(RegSize, VaArgOffset, true); - SDValue PtrOff = - DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue PtrOff = DAG.getFrameIndex(FI, FITy); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo::getFixedStack(MF, FI)); OutChains.push_back(Store); @@ -1022,8 +1022,9 @@ SDValue XtensaTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgAlignInBytes > 4) { OrigIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex, DAG.getConstant(ArgAlignInBytes - 1, DL, MVT::i32)); - OrigIndex = DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex, - DAG.getConstant(-ArgAlignInBytes, DL, MVT::i32)); + OrigIndex = + DAG.getNode(ISD::AND, DL, PtrVT, OrigIndex, + DAG.getSignedConstant(-ArgAlignInBytes, DL, MVT::i32)); } VAIndex = DAG.getNode(ISD::ADD, DL, PtrVT, OrigIndex,