diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index fddde668b7f1a..7725fa4f1ccb1 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -338,7 +338,7 @@ static bool requiresSaveVG(const MachineFunction &MF); // Conservatively, returns true if the function is likely to have an SVE vectors // on the stack. This function is safe to be called before callee-saves or // object offsets have been determined. -static bool isLikelyToHaveSVEStack(MachineFunction &MF) { +static bool isLikelyToHaveSVEStack(const MachineFunction &MF) { auto *AFI = MF.getInfo(); if (AFI->isSVECC()) return true; @@ -532,6 +532,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const AArch64FunctionInfo &AFI = *MF.getInfo(); // Win64 EH requires a frame pointer if funclets are present, as the locals // are accessed off the frame pointer in both the parent function and the @@ -545,6 +546,29 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { MFI.hasStackMap() || MFI.hasPatchPoint() || RegInfo->hasStackRealignment(MF)) return true; + + // If we: + // + // 1. Have streaming mode changes + // OR: + // 2. Have a streaming body with SVE stack objects + // + // Then the value of VG restored when unwinding to this function may not match + // the value of VG used to set up the stack. + // + // This is a problem as the CFA can be described with an expression of the + // form: CFA = SP + NumBytes + VG * NumScalableBytes. + // + // If the value of VG used in that expression does not match the value used to + // set up the stack, an incorrect address for the CFA will be computed, and + // unwinding will fail. + // + // We work around this issue by ensuring the frame-pointer can describe the + // CFA in either of these cases. + if (AFI.needsDwarfUnwindInfo(MF) && + ((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) && + (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0))) + return true; // With large callframes around we may need to use FP to access the scavenging // emergency spillslot. // @@ -663,10 +687,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); - AArch64FunctionInfo *AFI = MF.getInfo(); - SMEAttrs Attrs = AFI->getSMEFnAttrs(); - bool LocallyStreaming = - Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface(); const std::vector &CSI = MFI.getCalleeSavedInfo(); if (CSI.empty()) @@ -680,14 +700,6 @@ void AArch64FrameLowering::emitCalleeSavedGPRLocations( assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea(); - - // The location of VG will be emitted before each streaming-mode change in - // the function. Only locally-streaming functions require emitting the - // non-streaming VG location here. - if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) || - (!LocallyStreaming && Info.getReg() == AArch64::VG)) - continue; - CFIBuilder.buildOffset(Info.getReg(), Offset); } } @@ -707,8 +719,16 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations( AArch64FunctionInfo &AFI = *MF.getInfo(); CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); + std::optional IncomingVGOffsetFromDefCFA; + if (requiresSaveVG(MF)) { + auto IncomingVG = *find_if( + reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; }); + IncomingVGOffsetFromDefCFA = + MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea(); + } + for (const auto &Info : CSI) { - if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) + if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector) continue; // Not all unwinders may know about SVE registers, so assume the lowest @@ -722,7 +742,8 @@ void AArch64FrameLowering::emitCalleeSavedSVELocations( StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI)); - CFIBuilder.insertCFIInst(createCFAOffset(TRI, Reg, Offset)); + CFIBuilder.insertCFIInst( + createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA)); } } @@ -783,9 +804,6 @@ static void emitCalleeSavedRestores(MachineBasicBlock &MBB, !static_cast(TRI).regNeedsCFI(Reg, Reg)) continue; - if (!Info.isRestored()) - continue; - CFIBuilder.buildRestore(Info.getReg()); } } @@ -1465,10 +1483,10 @@ bool requiresGetVGCall(MachineFunction &MF) { static bool requiresSaveVG(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); + if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges()) + return false; // For Darwin platforms we don't save VG for non-SVE functions, even if SME // is enabled with streaming mode changes. - if (!AFI->hasStreamingModeChanges()) - return false; auto &ST = MF.getSubtarget(); if (ST.isTargetDarwin()) return ST.hasSVE(); @@ -1484,8 +1502,7 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) { unsigned Opc = MBBI->getOpcode(); - if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || - Opc == AArch64::UBFMXri) + if (Opc == AArch64::CNTD_XPiI) return true; if (!requiresGetVGCall(*MBBI->getMF())) @@ -1494,7 +1511,7 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI, if (Opc == AArch64::BL) return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG); - return Opc == AArch64::ORRXrr; + return Opc == TargetOpcode::COPY; } // Convert callee-save register save/restore instruction to do stack pointer @@ -1509,9 +1526,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( unsigned NewOpc; // If the function contains streaming mode changes, we expect instructions - // to calculate the value of VG before spilling. For locally-streaming - // functions, we need to do this for both the streaming and non-streaming - // vector length. Move past these instructions if necessary. + // to calculate the value of VG before spilling. Move past these instructions + // if necessary. MachineFunction &MF = *MBB.getParent(); if (requiresSaveVG(MF)) { auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -3475,7 +3491,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineFunction &MF = *MBB.getParent(); auto &TLI = *MF.getSubtarget().getTargetLowering(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - AArch64FunctionInfo *AFI = MF.getInfo(); bool NeedsWinCFI = needsWinCFI(MF); DebugLoc DL; SmallVector RegPairs; @@ -3544,48 +3559,34 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( } unsigned X0Scratch = AArch64::NoRegister; + auto RestoreX0 = make_scope_exit([&] { + if (X0Scratch != AArch64::NoRegister) + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), AArch64::X0) + .addReg(X0Scratch) + .setMIFlag(MachineInstr::FrameSetup); + }); + if (Reg1 == AArch64::VG) { // Find an available register to store value of VG to. Reg1 = findScratchNonCalleeSaveRegister(&MBB, true); assert(Reg1 != AArch64::NoRegister); - SMEAttrs Attrs = AFI->getSMEFnAttrs(); - - if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() && - AFI->getStreamingVGIdx() == std::numeric_limits::max()) { - // For locally-streaming functions, we need to store both the streaming - // & non-streaming VG. Spill the streaming value first. - BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1) - .addImm(1) - .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1) - .addReg(Reg1) - .addImm(3) - .addImm(63) - .setMIFlag(MachineInstr::FrameSetup); - - AFI->setStreamingVGIdx(RPI.FrameIdx); - } else if (MF.getSubtarget().hasSVE()) { + if (MF.getSubtarget().hasSVE()) { BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1) .addImm(31) .addImm(1) .setMIFlag(MachineInstr::FrameSetup); - AFI->setVGIdx(RPI.FrameIdx); } else { const AArch64Subtarget &STI = MF.getSubtarget(); - if (llvm::any_of( - MBB.liveins(), - [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { - return STI.getRegisterInfo()->isSuperOrSubRegisterEq( - AArch64::X0, LiveIn.PhysReg); - })) + if (any_of(MBB.liveins(), + [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { + return STI.getRegisterInfo()->isSuperOrSubRegisterEq( + AArch64::X0, LiveIn.PhysReg); + })) { X0Scratch = Reg1; - - if (X0Scratch != AArch64::NoRegister) - BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1) - .addReg(AArch64::XZR) - .addReg(AArch64::X0, RegState::Undef) - .addReg(AArch64::X0, RegState::Implicit) + BuildMI(MBB, MI, DL, TII.get(TargetOpcode::COPY), X0Scratch) + .addReg(AArch64::X0) .setMIFlag(MachineInstr::FrameSetup); + } RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG; const uint32_t *RegMask = @@ -3596,7 +3597,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( .addReg(AArch64::X0, RegState::ImplicitDefine) .setMIFlag(MachineInstr::FrameSetup); Reg1 = AArch64::X0; - AFI->setVGIdx(RPI.FrameIdx); } } @@ -3691,13 +3691,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( if (RPI.isPaired()) MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector); } - - if (X0Scratch != AArch64::NoRegister) - BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0) - .addReg(AArch64::XZR) - .addReg(X0Scratch, RegState::Undef) - .addReg(X0Scratch, RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); } return true; } @@ -4076,15 +4069,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Increase the callee-saved stack size if the function has streaming mode // changes, as we will need to spill the value of the VG register. - // For locally streaming functions, we spill both the streaming and - // non-streaming VG value. - SMEAttrs Attrs = AFI->getSMEFnAttrs(); - if (requiresSaveVG(MF)) { - if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) - CSStackSize += 16; - else - CSStackSize += 8; - } + if (requiresSaveVG(MF)) + CSStackSize += 8; // Determine if a Hazard slot should be used, and increase the CSStackSize by // StackHazardSize if so. @@ -4235,29 +4221,13 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( // Insert VG into the list of CSRs, immediately before LR if saved. if (requiresSaveVG(MF)) { - std::vector VGSaves; - SMEAttrs Attrs = AFI->getSMEFnAttrs(); - - auto VGInfo = CalleeSavedInfo(AArch64::VG); - VGInfo.setRestored(false); - VGSaves.push_back(VGInfo); - - // Add VG again if the function is locally-streaming, as we will spill two - // values. - if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) - VGSaves.push_back(VGInfo); - - bool InsertBeforeLR = false; - - for (unsigned I = 0; I < CSI.size(); I++) - if (CSI[I].getReg() == AArch64::LR) { - InsertBeforeLR = true; - CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end()); - break; - } - - if (!InsertBeforeLR) - llvm::append_range(CSI, VGSaves); + CalleeSavedInfo VGInfo(AArch64::VG); + auto It = + find_if(CSI, [](auto &Info) { return Info.getReg() == AArch64::LR; }); + if (It != CSI.end()) + CSI.insert(It, VGInfo); + else + CSI.push_back(VGInfo); } Register LastReg = 0; @@ -5260,46 +5230,11 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, } } // namespace -static void emitVGSaveRestore(MachineBasicBlock::iterator II, - const AArch64FrameLowering *TFI) { - MachineInstr &MI = *II; - MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - - if (MI.getOpcode() != AArch64::VGSavePseudo && - MI.getOpcode() != AArch64::VGRestorePseudo) - return; - - auto *AFI = MF->getInfo(); - SMEAttrs FuncAttrs = AFI->getSMEFnAttrs(); - bool LocallyStreaming = - FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface(); - - int64_t VGFrameIdx = - LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx(); - assert(VGFrameIdx != std::numeric_limits::max() && - "Expected FrameIdx for VG"); - - CFIInstBuilder CFIBuilder(*MBB, II, MachineInstr::NoFlags); - if (MI.getOpcode() == AArch64::VGSavePseudo) { - const MachineFrameInfo &MFI = MF->getFrameInfo(); - int64_t Offset = - MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea(); - CFIBuilder.buildOffset(AArch64::VG, Offset); - } else { - CFIBuilder.buildRestore(AArch64::VG); - } - - MI.eraseFromParent(); -} - void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (requiresSaveVG(MF)) - emitVGSaveRestore(II++, this); - else if (StackTaggingMergeSetTag) + if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fd91e52282475..d168cc8d1bd06 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9517,17 +9517,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); - } - - SDValue NewChain = + Chain = changeStreamingMode(DAG, DL, CallAttrs.callee().hasStreamingInterface(), Chain, InGlue, getSMToggleCondition(CallAttrs)); - Chain = NewChain.getValue(0); - InGlue = NewChain.getValue(1); + InGlue = Chain.getValue(1); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -9712,13 +9705,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue, getSMToggleCondition(CallAttrs)); - - if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); - } } if (RequiresLazySave || CallAttrs.requiresEnablingZAAfterCall()) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d15f90deba74e..db028b4b7677c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5900,6 +5900,18 @@ static void appendReadRegExpr(SmallVectorImpl &Expr, unsigned RegNum) { Expr.push_back(0); } +// Convenience function to create a DWARF expression for loading a register from +// a CFA offset. +static void appendLoadRegExpr(SmallVectorImpl &Expr, + int64_t OffsetFromDefCFA) { + // This assumes the top of the DWARF stack contains the CFA. + Expr.push_back(dwarf::DW_OP_dup); + // Add the offset to the register. + appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus); + // Dereference the address (loads a 64 bit value).. + Expr.push_back(dwarf::DW_OP_deref); +} + // Convenience function to create a comment for // (+/-) NumBytes (* RegScale)? static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, @@ -5968,9 +5980,10 @@ MCCFIInstruction llvm::createDefCFA(const TargetRegisterInfo &TRI, return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed()); } -MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, - unsigned Reg, - const StackOffset &OffsetFromDefCFA) { +MCCFIInstruction +llvm::createCFAOffset(const TargetRegisterInfo &TRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA, + std::optional IncomingVGOffsetFromDefCFA) { int64_t NumBytes, NumVGScaledBytes; AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets( OffsetFromDefCFA, NumBytes, NumVGScaledBytes); @@ -5989,9 +6002,16 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, assert(NumVGScaledBytes && "Expected scalable offset"); SmallString<64> OffsetExpr; // + VG * NumVGScaledBytes - appendOffsetComment(NumVGScaledBytes, Comment, "* VG"); - appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true)); + StringRef VGRegScale; + if (IncomingVGOffsetFromDefCFA) { + appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA); + VGRegScale = "* IncomingVG"; + } else { + appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true)); + VGRegScale = "* VG"; + } appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul); + appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale); OffsetExpr.push_back(dwarf::DW_OP_plus); if (NumBytes) { // + NumBytes diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 70c814a3a48c9..179574a73aa01 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -646,8 +646,10 @@ bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable = true); -MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, - const StackOffset &OffsetFromDefCFA); +MCCFIInstruction +createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA, + std::optional IncomingVGOffsetFromDefCFA); /// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg /// plus Offset. This is intended to be used from within the prolog/epilog diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index ed3374ae68d00..1fde87e65a34b 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -235,10 +235,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { // The PTRUE is used for the LD/ST of ZReg pairs in save and restore. unsigned PredicateRegForFillSpill = 0; - // The stack slots where VG values are stored to. - int64_t VGIdx = std::numeric_limits::max(); - int64_t StreamingVGIdx = std::numeric_limits::max(); - // Holds the SME function attributes (streaming mode, ZA/ZT0 state). SMEAttrs SMEFnAttrs; @@ -280,12 +276,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { unsigned isPStateSMRegUsed() const { return PStateSMRegUsed; }; void setPStateSMRegUsed(bool Used = true) { PStateSMRegUsed = Used; }; - int64_t getVGIdx() const { return VGIdx; }; - void setVGIdx(unsigned Idx) { VGIdx = Idx; }; - - int64_t getStreamingVGIdx() const { return StreamingVGIdx; }; - void setStreamingVGIdx(unsigned FrameIdx) { StreamingVGIdx = FrameIdx; }; - bool isSVECC() const { return IsSVECC; }; void setIsSVECC(bool s) { IsSVECC = s; }; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 5c4e0c1093187..125225df15464 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -48,12 +48,6 @@ let usesCustomInserter = 1 in { } def : Pat<(i64 (AArch64EntryPStateSM)), (EntryPStateSM)>; -def AArch64VGSave : SDNode<"AArch64ISD::VG_SAVE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64VGRestore : SDNode<"AArch64ISD::VG_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; - //===----------------------------------------------------------------------===// // Old SME ABI lowering ISD nodes/pseudos (deprecated) //===----------------------------------------------------------------------===// @@ -362,16 +356,6 @@ def : Pat<(AArch64_smstart (i32 svcr_op:$pstate)), def : Pat<(AArch64_smstop (i32 svcr_op:$pstate)), (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>; - -// Pseudo to insert cfi_offset/cfi_restore instructions. Used to save or restore -// the streaming value of VG around streaming-mode changes in locally-streaming -// functions. -def VGSavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>; -def : Pat<(AArch64VGSave), (VGSavePseudo)>; - -def VGRestorePseudo : Pseudo<(outs), (ins), []>, Sched<[]>; -def : Pat<(AArch64VGRestore), (VGRestorePseudo)>; - //===----------------------------------------------------------------------===// // SME2 Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp b/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp index 564af6708e1ed..85cca1de47b78 100644 --- a/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp @@ -128,13 +128,6 @@ bool SMEPeepholeOpt::optimizeStartStopPairs( bool Changed = false; MachineInstr *Prev = nullptr; - SmallVector ToBeRemoved; - - // Convenience function to reset the matching of a sequence. - auto Reset = [&]() { - Prev = nullptr; - ToBeRemoved.clear(); - }; // Walk through instructions in the block trying to find pairs of smstart // and smstop nodes that cancel each other out. We only permit a limited @@ -156,14 +149,10 @@ bool SMEPeepholeOpt::optimizeStartStopPairs( // that we marked for deletion in between. Prev->eraseFromParent(); MI.eraseFromParent(); - for (MachineInstr *TBR : ToBeRemoved) - TBR->eraseFromParent(); - ToBeRemoved.clear(); Prev = nullptr; Changed = true; NumSMChangesRemoved += 2; } else { - Reset(); Prev = &MI; } continue; @@ -179,7 +168,7 @@ bool SMEPeepholeOpt::optimizeStartStopPairs( // of streaming mode. If not, the algorithm should reset. switch (MI.getOpcode()) { default: - Reset(); + Prev = nullptr; break; case AArch64::COALESCER_BARRIER_FPR16: case AArch64::COALESCER_BARRIER_FPR32: @@ -193,7 +182,7 @@ bool SMEPeepholeOpt::optimizeStartStopPairs( // concrete example/test-case. if (isSVERegOp(TRI, MRI, MI.getOperand(0)) || isSVERegOp(TRI, MRI, MI.getOperand(1))) - Reset(); + Prev = nullptr; break; case AArch64::ADJCALLSTACKDOWN: case AArch64::ADJCALLSTACKUP: @@ -201,12 +190,6 @@ bool SMEPeepholeOpt::optimizeStartStopPairs( case AArch64::ADDXri: // We permit these as they don't generate SVE/NEON instructions. break; - case AArch64::VGRestorePseudo: - case AArch64::VGSavePseudo: - // When the smstart/smstop are removed, we should also remove - // the pseudos that save/restore the VG value for CFI info. - ToBeRemoved.push_back(&MI); - break; case AArch64::MSRpstatesvcrImm1: case AArch64::MSRpstatePseudo: llvm_unreachable("Should have been handled"); diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll index eb23a0f77accf..0b8645f66b5f3 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sme-za-call-lowering.ll @@ -46,12 +46,10 @@ define void @requires_za_save_streaming_mode_change() nounwind "aarch64_inout_za ; CHECK-BEFORE-SMEABI: bb.0 (%ir-block.0): ; CHECK-BEFORE-SMEABI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-BEFORE-SMEABI-NEXT: RequiresZASavePseudo - ; CHECK-BEFORE-SMEABI-NEXT: VGSavePseudo ; CHECK-BEFORE-SMEABI-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit $vg, implicit-def $vg, implicit-def $fpmr ; CHECK-BEFORE-SMEABI-NEXT: BL @private_za_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp ; CHECK-BEFORE-SMEABI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-BEFORE-SMEABI-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr - ; CHECK-BEFORE-SMEABI-NEXT: VGRestorePseudo ; CHECK-BEFORE-SMEABI-NEXT: RET_ReallyLR ; ; CHECK-AFTER-SMEABI-LABEL: name: requires_za_save_streaming_mode_change @@ -66,12 +64,10 @@ define void @requires_za_save_streaming_mode_change() nounwind "aarch64_inout_za ; CHECK-AFTER-SMEABI-NEXT: MSR 56965, [[COPY1]] ; CHECK-AFTER-SMEABI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-AFTER-SMEABI-NEXT: RequiresZASavePseudo - ; CHECK-AFTER-SMEABI-NEXT: VGSavePseudo ; CHECK-AFTER-SMEABI-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp, implicit $vg, implicit-def $vg, implicit-def $fpmr ; CHECK-AFTER-SMEABI-NEXT: BL @private_za_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp ; CHECK-AFTER-SMEABI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-AFTER-SMEABI-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr - ; CHECK-AFTER-SMEABI-NEXT: VGRestorePseudo ; CHECK-AFTER-SMEABI-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv ; CHECK-AFTER-SMEABI-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv ; CHECK-AFTER-SMEABI-NEXT: $x0 = ADDXri %stack.0, 0, 0 diff --git a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll index 94fe06733347a..22774ebf1a662 100644 --- a/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll +++ b/llvm/test/CodeGen/AArch64/outlining-with-streaming-mode-changes.ll @@ -7,11 +7,10 @@ define void @streaming_mode_change1() #0 { ; CHECK-LABEL: streaming_mode_change1: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm @@ -24,7 +23,6 @@ define void @streaming_mode_change1() #0 { ; ; OUTLINER-LABEL: streaming_mode_change1: ; OUTLINER-NOT: OUTLINED_FUNCTION_ -; call void @callee(); ret void; } @@ -33,11 +31,10 @@ define void @streaming_mode_change2() #0 { ; CHECK-LABEL: streaming_mode_change2: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm @@ -50,7 +47,6 @@ define void @streaming_mode_change2() #0 { ; ; OUTLINER-LABEL: streaming_mode_change2: ; OUTLINER-NOT: OUTLINED_FUNCTION_ -; call void @callee(); ret void; } @@ -59,11 +55,10 @@ define void @streaming_mode_change3() #0 { ; CHECK-LABEL: streaming_mode_change3: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm @@ -76,7 +71,6 @@ define void @streaming_mode_change3() #0 { ; ; OUTLINER-LABEL: streaming_mode_change3: ; OUTLINER-NOT: OUTLINED_FUNCTION_ -; call void @callee(); ret void; } diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index 3579baae1d7d8..25a7b87d37d9e 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -88,18 +88,14 @@ define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "a define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: mov x0, x9 ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state_size ; CHECK-NEXT: sub sp, sp, x0 ; CHECK-NEXT: mov x20, sp @@ -123,12 +119,12 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: sub sp, x29, #64 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i64 @private_za_decl(i64 %v) %res2 = call i64 @private_za_decl(i64 %res) @@ -139,18 +135,14 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: mov x0, x9 ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl __arm_sme_state_size @@ -188,12 +180,12 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee( ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: sub sp, x29, #64 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i64 @private_za_decl(i64 %v) %res2 = call i64 @private_za_decl(i64 %res) diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll index 1567ca258cccb..9bc5ee6988bcf 100644 --- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll +++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll @@ -10,13 +10,11 @@ target triple = "aarch64" define void @streaming_compatible() #0 { ; CHECK-LABEL: streaming_compatible: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: stp x0, x19, [sp, #72] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbz w19, #0, .LBB0_2 @@ -28,12 +26,11 @@ define void @streaming_compatible() #0 { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @non_streaming() ret void @@ -47,14 +44,12 @@ declare void @non_streaming() define void @streaming_compatible_arg(float %f) #0 { ; CHECK-LABEL: streaming_compatible_arg: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: bl __arm_get_current_vg -; CHECK-NEXT: stp x0, x19, [sp, #88] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 @@ -68,13 +63,12 @@ define void @streaming_compatible_arg(float %f) #0 { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret call void @non_streaming(float %f) ret void diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll index 1933eb85b77f2..8d6432ced8e1d 100644 --- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll +++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll @@ -9,9 +9,8 @@ declare void @my_func2( %v) define void @fbyte( %v) #0{ ; NOPAIR-LABEL: fbyte: ; NOPAIR: // %bb.0: -; NOPAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; NOPAIR-NEXT: cntd x9 -; NOPAIR-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill +; NOPAIR-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; NOPAIR-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 ; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -85,15 +84,14 @@ define void @fbyte( %v) #0{ ; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: addvl sp, sp, #18 -; NOPAIR-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload -; NOPAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; NOPAIR-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; NOPAIR-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload ; NOPAIR-NEXT: ret ; ; PAIR-LABEL: fbyte: ; PAIR: // %bb.0: -; PAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; PAIR-NEXT: cntd x9 -; PAIR-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill +; PAIR-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; PAIR-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 ; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -167,8 +165,8 @@ define void @fbyte( %v) #0{ ; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: addvl sp, sp, #18 -; PAIR-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload -; PAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; PAIR-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; PAIR-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload ; PAIR-NEXT: ret call void @my_func2( %v) ret void @@ -177,9 +175,7 @@ define void @fbyte( %v) #0{ define void @fhalf( %v) #1{ ; NOPAIR-LABEL: fhalf: ; NOPAIR: // %bb.0: -; NOPAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; NOPAIR-NEXT: cntd x9 -; NOPAIR-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-18 ; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -241,14 +237,12 @@ define void @fhalf( %v) #1{ ; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: addvl sp, sp, #18 -; NOPAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret ; ; PAIR-LABEL: fhalf: ; PAIR: // %bb.0: -; PAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; PAIR-NEXT: cntd x9 -; PAIR-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-18 ; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill ; PAIR-NEXT: ptrue pn8.b @@ -298,7 +292,7 @@ define void @fhalf( %v) #1{ ; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: addvl sp, sp, #18 -; PAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret call void @my_func() ret void @@ -307,12 +301,7 @@ define void @fhalf( %v) #1{ define void @ffloat( %v) #2 { ; NOPAIR-LABEL: ffloat: ; NOPAIR: // %bb.0: -; NOPAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; NOPAIR-NEXT: rdsvl x9, #1 -; NOPAIR-NEXT: lsr x9, x9, #3 -; NOPAIR-NEXT: str x9, [sp, #16] // 8-byte Folded Spill -; NOPAIR-NEXT: cntd x9 -; NOPAIR-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; NOPAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; NOPAIR-NEXT: addsvl sp, sp, #-18 ; NOPAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; NOPAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -374,17 +363,12 @@ define void @ffloat( %v) #2 { ; NOPAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; NOPAIR-NEXT: addsvl sp, sp, #18 -; NOPAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; NOPAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; NOPAIR-NEXT: ret ; ; PAIR-LABEL: ffloat: ; PAIR: // %bb.0: -; PAIR-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; PAIR-NEXT: rdsvl x9, #1 -; PAIR-NEXT: lsr x9, x9, #3 -; PAIR-NEXT: str x9, [sp, #16] // 8-byte Folded Spill -; PAIR-NEXT: cntd x9 -; PAIR-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; PAIR-NEXT: addsvl sp, sp, #-18 ; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -446,7 +430,7 @@ define void @ffloat( %v) #2 { ; PAIR-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; PAIR-NEXT: addsvl sp, sp, #18 -; PAIR-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; PAIR-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; PAIR-NEXT: ret call void @my_func() ret void diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll index a08e4896f5ee9..48ac156a43875 100644 --- a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll +++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll @@ -5,18 +5,17 @@ declare void @normal_callee(); define void @locally_streaming_fn() #0 { ; CHECK-LABEL: locally_streaming_fn: ; CHECK: ; %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: rdsvl x9, #1 -; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] ; 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset vg, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -26,19 +25,19 @@ define void @locally_streaming_fn() #0 { ; CHECK-NEXT: .cfi_offset b14, -88 ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_offset vg, -24 -; CHECK-NEXT: smstop sm -; CHECK-NEXT: bl _normal_callee +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl _normal_callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg -; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] ; 8-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 759f3ee609e58..e1bfdddaba923 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -17,8 +17,6 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: cntd x9 -; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill ; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-FISEL-NEXT: smstart sm ; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload @@ -45,8 +43,6 @@ define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: cntd x9 -; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: smstart sm ; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload @@ -80,8 +76,6 @@ define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: cntd x9 -; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm ; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload @@ -108,17 +102,12 @@ entry: define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { ; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: ; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: sub sp, sp, #128 +; CHECK-COMMON-NEXT: sub sp, sp, #112 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: rdsvl x9, #1 -; CHECK-COMMON-NEXT: lsr x9, x9, #3 -; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: cntd x9 -; CHECK-COMMON-NEXT: str x9, [sp, #112] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload @@ -140,7 +129,7 @@ define double @locally_streaming_caller_normal_callee(double %x) nounwind noinli ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: add sp, sp, #128 +; CHECK-COMMON-NEXT: add sp, sp, #112 ; CHECK-COMMON-NEXT: ret %call = call double @normal_callee(double %x); %add = fadd double %call, 4.200000e+01 @@ -177,16 +166,11 @@ define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noi define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" { ; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr: ; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: rdsvl x9, #1 -; CHECK-COMMON-NEXT: lsr x9, x9, #3 -; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: cntd x9 -; CHECK-COMMON-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: blr x0 ; CHECK-COMMON-NEXT: smstop sm @@ -194,7 +178,7 @@ define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noin ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void %p() "aarch64_pstate_sm_enabled" ret void @@ -208,8 +192,6 @@ define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optno ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: cntd x9 -; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: blr x0 ; CHECK-COMMON-NEXT: smstop sm @@ -339,22 +321,21 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-LABEL: f128_call_sm: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: sub sp, sp, #112 -; CHECK-COMMON-NEXT: cntd x9 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm ; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-COMMON-NEXT: bl __addtf3 ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #112 @@ -403,22 +384,21 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-LABEL: frem_call_sm: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: sub sp, sp, #96 -; CHECK-COMMON-NEXT: cntd x9 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #96 @@ -431,14 +411,12 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-COMMON-LABEL: frem_call_sm_compat: ; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: sub sp, sp, #112 -; CHECK-COMMON-NEXT: cntd x9 +; CHECK-COMMON-NEXT: sub sp, sp, #96 ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: bl __arm_sme_state ; CHECK-COMMON-NEXT: mov x19, x0 @@ -453,14 +431,13 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati ; CHECK-COMMON-NEXT: // %bb.3: ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: .LBB12_4: +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: add sp, sp, #112 +; CHECK-COMMON-NEXT: add sp, sp, #96 ; CHECK-COMMON-NEXT: ret %res = frem float %a, %b ret float %res diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index e623d3fb075f7..c57cb8e0873d0 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -213,15 +213,13 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test_lazy_save_and_conditional_smstart: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: rdsvl x8, #1 @@ -253,25 +251,23 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za ; CHECK-NEXT: .LBB3_6: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_lazy_save_and_conditional_smstart: ; CHECK-NEWLOWERING: // %bb.0: -; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: cntd x9 +; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 -; CHECK-NEWLOWERING-NEXT: str x9, [sp, #80] // 8-byte Folded Spill -; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp @@ -300,12 +296,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za ; CHECK-NEWLOWERING-NEXT: .LBB3_6: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64 -; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload +; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @private_za_callee() ret void diff --git a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll index 69f603458670c..2e198ad8f0d05 100644 --- a/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll +++ b/llvm/test/CodeGen/AArch64/sme-must-save-lr-for-vg.ll @@ -11,14 +11,12 @@ define void @foo() "aarch64_pstate_sm_body" { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 -; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_get_current_vg ; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset vg, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -29,13 +27,15 @@ define void @foo() "aarch64_pstate_sm_body" { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll index ff5b7c047eaf5..ab7c661d27187 100644 --- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll +++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll @@ -11,11 +11,10 @@ define void @test0(ptr %callee) nounwind { ; CHECK-LABEL: test0: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl callee_sm ; CHECK-NEXT: bl callee_sm @@ -36,11 +35,10 @@ define void @test1() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: bl callee @@ -60,13 +58,11 @@ define void @test1() nounwind "aarch64_pstate_sm_enabled" { define void @test2() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbz w19, #0, .LBB2_2 @@ -79,12 +75,11 @@ define void @test2() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @callee() call void @callee() @@ -95,13 +90,11 @@ define void @test2() nounwind "aarch64_pstate_sm_compatible" { define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbnz w19, #0, .LBB3_2 @@ -131,12 +124,11 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: // %bb.11: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB3_12: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @callee_sm() call void @callee() @@ -149,11 +141,10 @@ define void @test4() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: bl callee_farg @@ -176,12 +167,11 @@ define void @test5(float %f) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload @@ -205,12 +195,11 @@ define float @test6(float %f) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test6: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload @@ -218,10 +207,10 @@ define float @test6(float %f) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: bl callee_farg_fret ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -264,11 +253,10 @@ define void @test8() nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm @@ -289,11 +277,98 @@ define void @test8() nounwind "aarch64_pstate_sm_enabled" { define void @test9() "aarch64_pstate_sm_body" { ; CHECK-LABEL: test9: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @callee() + ret void +} + +; test that if the 'smstart' and 'smtop' are entirely removed in a locally +; streaming function, we use the FP, not an expression to describe the CFA. +define aarch64_sve_vector_pcs void @test9_1() "aarch64_pstate_sm_body" { +; CHECK-LABEL: test9_1: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addsvl sp, sp, #-18 +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w28, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d8 @ cfa - 8 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d9 @ cfa - 16 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d10 @ cfa - 24 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d11 @ cfa - 32 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d12 @ cfa - 40 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d13 @ cfa - 48 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d14 @ cfa - 56 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d15 @ cfa - 64 * VG - 32 ; CHECK-NEXT: bl callee -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addsvl sp, sp, #18 +; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @callee() ret void @@ -307,16 +382,15 @@ define void @test10() "aarch64_pstate_sm_body" { ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset vg, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -327,18 +401,18 @@ define void @test10() "aarch64_pstate_sm_body" { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: bl callee_sm -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -359,13 +433,11 @@ define void @test10() "aarch64_pstate_sm_body" { define void @test11(ptr %p) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test11: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee @@ -375,12 +447,11 @@ define void @test11(ptr %p) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @callee() store zeroinitializer, ptr %p @@ -396,16 +467,15 @@ define void @test12() "aarch64_pstate_sm_body" { ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset vg, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -416,20 +486,20 @@ define void @test12() "aarch64_pstate_sm_body" { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop za -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: smstart za ; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -452,12 +522,11 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: test13: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: mov x19, x0 @@ -475,8 +544,8 @@ define void @test13(ptr %ptr) nounwind "aarch64_pstate_sm_enabled" { ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll index 5ea5e3e7766e8..b947c943ba448 100644 --- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll +++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll @@ -16,12 +16,11 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 @@ -32,8 +31,8 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -49,12 +48,11 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 @@ -65,8 +63,8 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -82,12 +80,11 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov x19, x1 @@ -98,8 +95,8 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -115,12 +112,11 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov x19, x1 @@ -131,8 +127,8 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -148,12 +144,11 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 @@ -171,8 +166,8 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -188,12 +183,11 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 @@ -211,8 +205,8 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -228,12 +222,11 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -251,8 +244,8 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -273,12 +266,11 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -296,8 +288,8 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -314,12 +306,11 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -337,8 +328,8 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -355,12 +346,11 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -378,8 +368,8 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -396,12 +386,11 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -419,8 +408,8 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -437,12 +426,11 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 @@ -460,8 +448,8 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -478,12 +466,11 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -501,8 +488,8 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -519,12 +506,11 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -542,8 +528,8 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -564,12 +550,11 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -587,8 +572,8 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -604,12 +589,11 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -627,8 +611,8 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -644,12 +628,11 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -667,8 +650,8 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -684,12 +667,11 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -707,8 +689,8 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -724,12 +706,11 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -747,8 +728,8 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -764,12 +745,11 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -787,8 +767,8 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -804,12 +784,11 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -827,8 +806,8 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -844,12 +823,11 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 @@ -867,8 +845,8 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -887,12 +865,11 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_arg_v8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 @@ -901,10 +878,10 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl use_v8i1 @@ -914,8 +891,8 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { ; CHECK-NEXT: str p0, [x19] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -934,25 +911,22 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { define void @dont_coalesce_res_i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i8 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i8 @get_i8() %vec = insertelement poison, i8 %res, i32 0 @@ -963,25 +937,22 @@ define void @dont_coalesce_res_i8(ptr %ptr) #0 { define void @dont_coalesce_res_i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i16 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i16 @get_i16() %vec = insertelement poison, i16 %res, i32 0 @@ -992,25 +963,22 @@ define void @dont_coalesce_res_i16(ptr %ptr) #0 { define void @dont_coalesce_res_i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i32 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i32 @get_i32() %vec = insertelement poison, i32 %res, i32 0 @@ -1021,25 +989,22 @@ define void @dont_coalesce_res_i32(ptr %ptr) #0 { define void @dont_coalesce_res_i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_i64 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call i64 @get_i64() %vec = insertelement poison, i64 %res, i32 0 @@ -1050,29 +1015,26 @@ define void @dont_coalesce_res_i64(ptr %ptr) #0 { define void @dont_coalesce_res_f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f16 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call half @get_f16() %vec = insertelement poison, half %res, i32 0 @@ -1083,14 +1045,12 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 { define void @dont_coalesce_res_f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f32 @@ -1098,13 +1058,12 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call float @get_f32() %vec = insertelement poison, float %res, i32 0 @@ -1115,14 +1074,12 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 { define void @dont_coalesce_res_f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_f64 @@ -1130,13 +1087,12 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call double @get_f64() %vec = insertelement poison, double %res, i32 0 @@ -1151,14 +1107,12 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 { define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i8 @@ -1166,13 +1120,12 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i8> @get_v1i8() %elt = extractelement <1 x i8> %res, i32 0 @@ -1184,14 +1137,12 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i16 @@ -1199,13 +1150,12 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i16> @get_v1i16() %elt = extractelement <1 x i16> %res, i32 0 @@ -1217,14 +1167,12 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i32 @@ -1232,13 +1180,12 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i32> @get_v1i32() %elt = extractelement <1 x i32> %res, i32 0 @@ -1250,14 +1197,12 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1i64 @@ -1265,13 +1210,12 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x i64> @get_v1i64() %elt = extractelement <1 x i64> %res, i32 0 @@ -1283,29 +1227,26 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f16 ; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x half> @get_v1f16() %elt = extractelement <1 x half> %res, i32 0 @@ -1317,14 +1258,12 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f32 @@ -1332,13 +1271,12 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x float> @get_v1f32() %elt = extractelement <1 x float> %res, i32 0 @@ -1350,14 +1288,12 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v1f64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v1f64 @@ -1365,13 +1301,12 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <1 x double> @get_v1f64() %elt = extractelement <1 x double> %res, i32 0 @@ -1387,29 +1322,26 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v16i8 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <16 x i8> @get_v16i8() %vec = call @llvm.vector.insert.nxv16i8.v16i8( poison, <16 x i8> %res, i64 0) @@ -1420,29 +1352,26 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8i16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <8 x i16> @get_v8i16() %vec = call @llvm.vector.insert.nxv8i16.v8i16( poison, <8 x i16> %res, i64 0) @@ -1453,29 +1382,26 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4i32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <4 x i32> @get_v4i32() %vec = call @llvm.vector.insert.nxv4i32.v4i32( poison, <4 x i32> %res, i64 0) @@ -1486,29 +1412,26 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2i64 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x i64> @get_v2i64() %vec = call @llvm.vector.insert.nxv2i64.v2i64( poison, <2 x i64> %res, i64 0) @@ -1519,29 +1442,26 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v8f16 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <8 x half> @get_v8f16() %vec = call @llvm.vector.insert.nxv8f16.v8f16( poison, <8 x half> %res, i64 0) @@ -1552,29 +1472,26 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v4f32 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <4 x float> @get_v4f32() %vec = call @llvm.vector.insert.nxv4f32.v4f32( poison, <4 x float> %res, i64 0) @@ -1585,29 +1502,26 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { define void @dont_coalesce_res_v2f64(ptr %ptr) #0 { ; CHECK-LABEL: dont_coalesce_res_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl get_v2f64 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x double> @get_v2f64() %vec = call @llvm.vector.insert.nxv2f64.v2f64( poison, <2 x double> %res, i64 0) diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll index 52078941aa745..39ea180e7ed81 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll @@ -8,15 +8,11 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: sm_body_sm_compatible_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: tbnz w0, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: @@ -31,7 +27,7 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret ret float zeroinitializer } @@ -39,15 +35,11 @@ define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aar define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbnz w19, #0, .LBB1_2 @@ -61,12 +53,11 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @normal_callee() ret void @@ -76,16 +67,12 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbnz w19, #0, .LBB2_2 @@ -99,12 +86,11 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block ; CHECK-NEXT: // %bb.4: // %if.else ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB2_5: // %if.else +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_6: // %if.then ; CHECK-NEXT: smstop sm @@ -114,12 +100,11 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block ; CHECK-NEXT: // %bb.7: // %if.then ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB2_8: // %if.then +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %x, 0 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll index dd336e0f2e686..a3ec2ddb2b872 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -8,15 +8,11 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; define void @locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_streaming_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: bl streaming_compatible_callee @@ -25,7 +21,7 @@ define void @locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_body ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_compatible_callee(); @@ -51,33 +47,26 @@ define void @streaming_and_locally_streaming_caller_streaming_callee() "aarch64_ define void @locally_streaming_multiple_exit(i64 %cond) "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_multiple_exit: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rdsvl x9, #1 -; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: str x9, [sp, #-80]! // 8-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: cmp x0, #1 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: // %bb.1: // %if.else ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_2: // %if.end ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -98,16 +87,11 @@ if.end: define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_no_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: rdsvl x9, #1 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: index z0.d, #0, #1 @@ -118,12 +102,12 @@ define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_psta ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %add = add <2 x i64> %a, ; @@ -155,16 +139,12 @@ define void @locally_streaming_caller_locally_streaming_callee() "aarch64_pstate define <2 x i64> @locally_streaming_caller_compatible_callee_vec_args_ret(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_compatible_callee_vec_args_ret: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: sub sp, sp, #96 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -177,7 +157,7 @@ define <2 x i64> @locally_streaming_caller_compatible_callee_vec_args_ret(<2 x i ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x i64> @streaming_compatible_callee_vec_args_ret(<2 x i64> %a) "aarch64_pstate_sm_compatible" ret <2 x i64> %res; @@ -188,16 +168,12 @@ declare <2 x i64> @streaming_compatible_callee_vec_args_ret(<2 x i64>) "aarch64_ define {<2 x i64>, <2 x i64>} @locally_streaming_caller_compatible_callee_struct_arg_ret({<2 x i64>, <2 x i64>} %arg) "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_compatible_callee_struct_arg_ret: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #128 -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: sub sp, sp, #112 ; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #112] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -210,7 +186,7 @@ define {<2 x i64>, <2 x i64>} @locally_streaming_caller_compatible_callee_struct ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %v1.arg = extractvalue {<2 x i64>, <2 x i64>} %arg, 1 %res = call {<2 x i64>, <2 x i64>} @streaming_compatible_callee_vec_arg_struct_ret(<2 x i64> %v1.arg) "aarch64_pstate_sm_compatible" @@ -224,16 +200,11 @@ declare {<2 x i64>, <2 x i64>} @streaming_compatible_callee_vec_arg_struct_ret(< define void @locally_streaming_caller_alloca() nounwind "aarch64_pstate_sm_body" { ; CHECK-LABEL: locally_streaming_caller_alloca: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #88] // 8-byte Folded Spill ; CHECK-NEXT: addsvl sp, sp, #-1 ; CHECK-NEXT: smstart sm ; CHECK-NEXT: mov x0, sp @@ -244,7 +215,7 @@ define void @locally_streaming_caller_alloca() nounwind "aarch64_pstate_sm_body" ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %alloca = alloca call void @use_ptr(ptr %alloca) "aarch64_pstate_sm_compatible" @@ -271,16 +242,11 @@ declare double @llvm.cos.f64(double) define float @test_arg_survives_loop(float %arg, i32 %N) nounwind "aarch64_pstate_sm_body" { ; CHECK-LABEL: test_arg_survives_loop: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: rdsvl x9, #1 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB9_1: // %for.body @@ -293,12 +259,12 @@ define float @test_arg_survives_loop(float %arg, i32 %N) nounwind "aarch64_pstat ; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret entry: br label %for.body @@ -318,15 +284,11 @@ for.cond.cleanup: define void @disable_tailcallopt() "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: disable_tailcallopt: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: smstop sm @@ -334,7 +296,7 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_body" nounwind { ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret tail call void @streaming_compatible_callee(); ret void; diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index 636c3ece9d411..ff4f36363edcf 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -36,13 +36,11 @@ define void @normal_caller_streaming_compatible_callee() nounwind { define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: streaming_compatible_caller_normal_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbz w19, #0, .LBB1_2 @@ -54,12 +52,11 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @normal_callee(); @@ -75,13 +72,11 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: streaming_compatible_caller_streaming_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbnz w19, #0, .LBB2_2 @@ -93,12 +88,11 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee(); @@ -130,12 +124,11 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-LABEL: streaming_compatible_with_neon_vectors: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: add x8, sp, #16 @@ -167,8 +160,8 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -183,9 +176,8 @@ declare <2 x double> @normal_callee_vec_arg(<2 x double>) define @streaming_compatible_with_scalable_vectors( %arg) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: streaming_compatible_with_scalable_vectors: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -263,8 +255,8 @@ define @streaming_compatible_with_scalable_vectors( @normal_callee_scalable_vec_arg( %arg) %fadd = fadd %res, %arg @@ -276,9 +268,8 @@ declare @normal_callee_scalable_vec_arg( @streaming_compatible_with_predicate_vectors( %arg) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: streaming_compatible_with_predicate_vectors: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -356,8 +347,8 @@ define @streaming_compatible_with_predicate_vectors( @normal_callee_predicate_vec_arg( %arg) %and = and %res, %arg @@ -369,13 +360,11 @@ declare @normal_callee_predicate_vec_arg() define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: conditional_smstart_unreachable_block: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbnz w19, #0, .LBB7_2 @@ -383,10 +372,6 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: bl streaming_callee -; CHECK-NEXT: tbnz w19, #0, .LBB7_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: smstop sm -; CHECK-NEXT: .LBB7_4: call void @streaming_callee() unreachable } @@ -394,16 +379,14 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: conditional_smstart_no_successor_block: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: tbz w8, #0, .LBB8_6 +; CHECK-NEXT: tbz w8, #0, .LBB8_5 ; CHECK-NEXT: // %bb.1: // %if.then ; CHECK-NEXT: tbnz w0, #0, .LBB8_3 ; CHECK-NEXT: // %bb.2: // %if.then @@ -414,14 +397,12 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co ; CHECK-NEXT: tbnz w19, #0, .LBB8_5 ; CHECK-NEXT: // %bb.4: // %if.then ; CHECK-NEXT: smstop sm -; CHECK-NEXT: .LBB8_5: // %if.then -; CHECK-NEXT: .LBB8_6: // %exit +; CHECK-NEXT: .LBB8_5: // %exit +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret br i1 %p, label %if.then, label %exit @@ -436,13 +417,11 @@ exit: define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind { ; CHECK-LABEL: disable_tailcallopt: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: tbz w19, #0, .LBB9_2 @@ -454,12 +433,11 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret tail call void @normal_callee(); @@ -476,10 +454,12 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #112] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x9, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -494,7 +474,6 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: tbz w19, #0, .LBB10_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: smstop sm @@ -508,17 +487,18 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: // %bb.3: // %entry ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB10_4: // %entry -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #120] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #112] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll index 438b941198449..8c4d57e244e03 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -22,11 +22,10 @@ define void @normal_caller_streaming_callee() nounwind { ; CHECK-LABEL: normal_caller_streaming_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm @@ -48,11 +47,10 @@ define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enable ; CHECK-LABEL: streaming_caller_normal_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl normal_callee ; CHECK-NEXT: smstart sm @@ -105,11 +103,10 @@ define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { ; CHECK-LABEL: call_to_function_pointer_streaming_enabled: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstop sm @@ -128,20 +125,19 @@ define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { ; CHECK-LABEL: smstart_clobber_simdfp: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -154,9 +150,7 @@ define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { define @smstart_clobber_sve( %x) nounwind { ; CHECK-LABEL: smstart_clobber_sve: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -222,7 +216,7 @@ define @smstart_clobber_sve( %x) nounwind { ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() ret %x; @@ -233,9 +227,7 @@ define @smstart_clobber_sve( %x) nounwind { define @smstart_clobber_sve_duplicate( %x) nounwind { ; CHECK-LABEL: smstart_clobber_sve_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-18 ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill @@ -302,7 +294,7 @@ define @smstart_clobber_sve_duplicate( %x) ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() call void @streaming_callee() @@ -314,12 +306,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta ; CHECK-LABEL: call_to_intrinsic_without_chain: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload @@ -327,11 +318,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta ; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fadd d0, d1, d0 -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -349,11 +340,10 @@ define void @disable_tailcallopt() nounwind { ; CHECK-LABEL: disable_tailcallopt: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm @@ -370,13 +360,11 @@ define void @disable_tailcallopt() nounwind { define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 { ; CHECK-LABEL: call_to_non_streaming_pass_sve_objects: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: rdsvl x3, #1 ; CHECK-NEXT: addvl x0, sp, #2 @@ -392,7 +380,7 @@ define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: %Data1 = alloca , align 16 @@ -409,12 +397,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-LABEL: call_to_non_streaming_pass_args: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll new file mode 100644 index 0000000000000..991776f11ae40 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll @@ -0,0 +1,308 @@ +; DEFINE: %{compile} = llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sme -mattr=+sve -verify-machineinstrs -enable-aarch64-sme-peephole-opt=false < %s +; RUN: %{compile} | FileCheck %s +; RUN: %{compile} -filetype=obj -o %t +; RUN: llvm-objdump --dwarf=frames %t | FileCheck %s --check-prefix=UNWINDINFO + +; This tests that functions with streaming mode changes use explicitly use the +; "IncomingVG" (the value of VG on entry to the function) in SVE unwind information. +; +; [ ] N -> S (Normal -> Streaming, mode change) +; [ ] S -> N (Streaming -> Normal, mode change) +; [ ] N -> N (Normal -> Normal, no mode change) +; [ ] S -> S (Streaming -> Streaming, no mode change) +; [ ] LS -> S (Locally-streaming -> Streaming, mode change) +; [ ] SC -> S (Streaming-compatible -> Streaming, mode change) + +declare void @normal_callee() +declare void @streaming_callee() "aarch64_pstate_sm_enabled" + +; [x] N -> S +; [ ] S -> N +; [ ] N -> N +; [ ] S -> S +; [ ] LS -> S +; [ ] SC -> S +define aarch64_sve_vector_pcs void @normal_caller_streaming_callee() { +; CHECK-LABEL: normal_caller_streaming_callee: +; CHECK: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: cntd x9 +; CHECK: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK: mov x29, sp +; CHECK: .cfi_def_cfa w29, 32 +; CHECK: .cfi_offset vg, -16 +; CHECK: addvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x10, 0x48, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d8 @ cfa - 8 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x49, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d9 @ cfa - 16 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4a, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d10 @ cfa - 24 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4b, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d11 @ cfa - 32 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4c, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d12 @ cfa - 40 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4d, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d13 @ cfa - 48 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4e, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d14 @ cfa - 56 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4f, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d15 @ cfa - 64 * IncomingVG - 32 +; CHECK: smstart sm +; CHECK: bl streaming_callee +; CHECK: smstop sm +; +; UNWINDINFO: DW_CFA_def_cfa: reg29 +32 +; UNWINDINFO: DW_CFA_offset: reg46 -16 +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus + call void @streaming_callee() + ret void +} + +; [ ] N -> S +; [x] S -> N +; [ ] N -> N +; [ ] S -> S +; [ ] LS -> S +; [ ] SC -> S +define aarch64_sve_vector_pcs void @streaming_caller_normal_callee() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: streaming_caller_normal_callee: +; CHECK: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: cntd x9 +; CHECK: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK: mov x29, sp +; CHECK: .cfi_def_cfa w29, 32 +; CHECK: .cfi_offset vg, -16 +; CHECK: addvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x10, 0x48, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d8 @ cfa - 8 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x49, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d9 @ cfa - 16 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4a, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d10 @ cfa - 24 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4b, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d11 @ cfa - 32 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4c, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d12 @ cfa - 40 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4d, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d13 @ cfa - 48 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4e, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d14 @ cfa - 56 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4f, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d15 @ cfa - 64 * IncomingVG - 32 +; CHECK: smstop sm +; CHECK: bl normal_callee +; CHECK: smstart sm +; +; UNWINDINFO: DW_CFA_def_cfa: reg29 +32 +; UNWINDINFO: DW_CFA_offset: reg46 -16 +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus + call void @normal_callee() + ret void +} + +; [ ] N -> S +; [ ] S -> N +; [x] N -> N +; [ ] S -> S +; [ ] LS -> S +; [ ] SC -> S +define aarch64_sve_vector_pcs void @normal_caller_normal_callee() { +; CHECK-LABEL: normal_caller_normal_callee: +; CHECK: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK: addvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG +; CHECK: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x49, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d9 @ cfa - 16 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 24 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 32 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4c, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x40, 0x1c // $d12 @ cfa - 40 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4d, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x40, 0x1c // $d13 @ cfa - 48 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 +; CHECK: bl normal_callee +; +; UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_consts +144, DW_OP_mul, DW_OP_plus +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_bregx 0x2e +0, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_bregx 0x2e +0, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_bregx 0x2e +0, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_bregx 0x2e +0, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_bregx 0x2e +0, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_bregx 0x2e +0, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_bregx 0x2e +0, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_bregx 0x2e +0, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus + call void @normal_callee() + ret void +} + +; [ ] N -> S +; [ ] S -> N +; [ ] N -> N +; [x] S -> S +; [ ] LS -> S +; [ ] SC -> S +define aarch64_sve_vector_pcs void @streaming_caller_streaming_callee() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: streaming_caller_streaming_callee: +; CHECK: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK: addvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x0f, 0x0a, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 16 + 144 * VG +; CHECK: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x49, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d9 @ cfa - 16 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 24 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 32 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4c, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x40, 0x1c // $d12 @ cfa - 40 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4d, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x40, 0x1c // $d13 @ cfa - 48 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4e, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x40, 0x1c // $d14 @ cfa - 56 * VG - 16 +; CHECK: .cfi_escape 0x10, 0x4f, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x40, 0x1c // $d15 @ cfa - 64 * VG - 16 +; CHECK: bl streaming_callee +; +; UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_consts +144, DW_OP_mul, DW_OP_plus +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_bregx 0x2e +0, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_bregx 0x2e +0, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_bregx 0x2e +0, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_bregx 0x2e +0, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_bregx 0x2e +0, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_bregx 0x2e +0, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_bregx 0x2e +0, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_bregx 0x2e +0, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_lit16, DW_OP_minus + call void @streaming_callee() + ret void +} + +; [ ] N -> S +; [ ] S -> N +; [ ] N -> N +; [ ] S -> S +; [x] LS -> S +; [ ] SC -> S +define aarch64_sve_vector_pcs void @locally_streaming() "aarch64_pstate_sm_body" { +; CHECK-LABEL: locally_streaming: +; CHECK: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK: .cfi_def_cfa_offset 32 +; CHECK: cntd x9 +; CHECK: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK: mov x29, sp +; CHECK: .cfi_def_cfa w29, 32 +; CHECK: .cfi_offset vg, -16 +; CHECK: addsvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x10, 0x48, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d8 @ cfa - 8 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x49, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d9 @ cfa - 16 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4a, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d10 @ cfa - 24 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4b, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d11 @ cfa - 32 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4c, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d12 @ cfa - 40 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4d, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d13 @ cfa - 48 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4e, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d14 @ cfa - 56 * IncomingVG - 32 +; CHECK: .cfi_escape 0x10, 0x4f, 0x0b, 0x12, 0x40, 0x1c, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d15 @ cfa - 64 * IncomingVG - 32 +; CHECK: smstart sm +; CHECK: bl streaming_callee +; CHECK: smstop sm +; +; UNWINDINFO: DW_CFA_def_cfa: reg29 +32 +; UNWINDINFO: DW_CFA_offset: reg46 -16 +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_dup, DW_OP_lit16, DW_OP_minus, DW_OP_deref, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_consts -32, DW_OP_plus + call void @streaming_callee() + ret void +} + +; [ ] N -> S +; [ ] S -> N +; [ ] N -> N +; [ ] S -> S +; [ ] LS -> S +; [x] SC -> S +define aarch64_sve_vector_pcs void @streaming_compatible_caller_conditional_mode_switch() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: streaming_compatible_caller_conditional_mode_switch: +; CHECK: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill +; CHECK: .cfi_def_cfa_offset 48 +; CHECK: cntd x9 +; CHECK: stp x28, x19, [sp, #32] // 16-byte Folded Spill +; CHECK: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK: mov x29, sp +; CHECK: .cfi_def_cfa w29, 48 +; CHECK: .cfi_offset vg, -32 +; CHECK: addvl sp, sp, #-18 +; CHECK: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * IncomingVG - 48 +; CHECK: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * IncomingVG - 48 +; CHECK: bl __arm_sme_state +; CHECK: mov x19, x0 +; CHECK: tbnz w19, #0, .LBB5_2 +; CHECK: smstart sm +; CHECK: .LBB5_2: +; CHECK: bl streaming_callee +; CHECK: tbnz w19, #0, .LBB5_4 +; CHECK: smstop sm +; CHECK: .LBB5_4: +; +; UNWINDINFO: DW_CFA_def_cfa: reg29 +48 +; UNWINDINFO: DW_CFA_offset: reg46 -32 +; UNWINDINFO: DW_CFA_expression: reg72 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -8, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -16, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -24, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -32, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -40, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -48, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -56, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus +; UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_dup, DW_OP_consts -32, DW_OP_plus, DW_OP_deref, DW_OP_consts -64, DW_OP_mul, DW_OP_plus, DW_OP_consts -48, DW_OP_plus + call void @streaming_callee() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll index fe3f493353b50..7efa1d8f7a6a7 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -15,12 +15,11 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ; CHECK-LABEL: test_no_stackslot_scavenging: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x9, x24, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill @@ -32,8 +31,8 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x24, [sp, #88] // 8-byte Folded Reload +; CHECK-NEXT: ldp x30, x24, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload @@ -48,21 +47,20 @@ define void @test_no_stackslot_scavenging(float %f) #0 { define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-pointer"="all" { ; CHECK-LABEL: test_no_stackslot_scavenging_with_fp: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-128]! // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: stp x28, x25, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x25, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: lsl x9, x0, #3 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov x19, sp -; CHECK-NEXT: str s0, [x29, #28] // 4-byte Folded Spill +; CHECK-NEXT: str s0, [x19, #12] // 4-byte Folded Spill ; CHECK-NEXT: add x9, x9, #15 ; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 ; CHECK-NEXT: sub x8, x8, x9 @@ -70,17 +68,17 @@ define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-po ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr s0, [x29, #28] // 4-byte Folded Reload +; CHECK-NEXT: ldr s0, [x19, #12] // 4-byte Folded Reload ; CHECK-NEXT: bl use_f ; CHECK-NEXT: smstart sm ; CHECK-NEXT: sub sp, x29, #64 -; CHECK-NEXT: ldp x24, x19, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x28, x25, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x25, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #128 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload ; CHECK-NEXT: ret %ptr2 = alloca i64, i64 %n, align 8 %ptr = alloca diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index 60d8987334c89..dec8eb0d8a936 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -16,34 +16,37 @@ declare void @streaming_callee_with_arg(i32) #0; define void @vg_unwind_simple() #0 { ; CHECK-LABEL: vg_unwind_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_offset b8, -24 -; CHECK-NEXT: .cfi_offset b9, -32 -; CHECK-NEXT: .cfi_offset b10, -40 -; CHECK-NEXT: .cfi_offset b11, -48 -; CHECK-NEXT: .cfi_offset b12, -56 -; CHECK-NEXT: .cfi_offset b13, -64 -; CHECK-NEXT: .cfi_offset b14, -72 -; CHECK-NEXT: .cfi_offset b15, -80 -; CHECK-NEXT: .cfi_offset vg, -8 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -66,6 +69,7 @@ define void @vg_unwind_simple() #0 { ; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -76,11 +80,9 @@ define void @vg_unwind_simple() #0 { ; FP-CHECK-NEXT: .cfi_offset b13, -80 ; FP-CHECK-NEXT: .cfi_offset b14, -88 ; FP-CHECK-NEXT: .cfi_offset b15, -96 -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload @@ -88,6 +90,7 @@ define void @vg_unwind_simple() #0 { ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -102,7 +105,6 @@ define void @vg_unwind_simple() #0 { ; ; OUTLINER-CHECK-LABEL: vg_unwind_simple: ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ -; call void @callee(); ret void; } @@ -119,10 +121,12 @@ define void @vg_unwind_needs_gap() #0 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x20, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x9, x20, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w20, -8 +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -133,20 +137,20 @@ define void @vg_unwind_needs_gap() #0 { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x20, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -170,6 +174,7 @@ define void @vg_unwind_needs_gap() #0 { ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset w20, -8 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -182,11 +187,9 @@ define void @vg_unwind_needs_gap() #0 { ; FP-CHECK-NEXT: .cfi_offset b15, -96 ; FP-CHECK-NEXT: //APP ; FP-CHECK-NEXT: //NO_APP -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldr x20, [sp, #88] // 8-byte Folded Reload @@ -196,6 +199,7 @@ define void @vg_unwind_needs_gap() #0 { ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 ; FP-CHECK-NEXT: .cfi_restore w20 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -219,38 +223,41 @@ define void @vg_unwind_needs_gap() #0 { define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; CHECK-LABEL: vg_unwind_with_fixed_args: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: .cfi_def_cfa_offset 112 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_offset b8, -24 -; CHECK-NEXT: .cfi_offset b9, -32 -; CHECK-NEXT: .cfi_offset b10, -40 -; CHECK-NEXT: .cfi_offset b11, -48 -; CHECK-NEXT: .cfi_offset b12, -56 -; CHECK-NEXT: .cfi_offset b13, -64 -; CHECK-NEXT: .cfi_offset b14, -72 -; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset vg, -8 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl fixed_callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -274,6 +281,7 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; FP-CHECK-NEXT: str x9, [sp, #96] // 8-byte Folded Spill ; FP-CHECK-NEXT: add x29, sp, #80 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -285,12 +293,10 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; FP-CHECK-NEXT: .cfi_offset b14, -88 ; FP-CHECK-NEXT: .cfi_offset b15, -96 ; FP-CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; FP-CHECK-NEXT: bl fixed_callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 112 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload @@ -299,6 +305,7 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { ; FP-CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; FP-CHECK-NEXT: add sp, sp, #112 ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -321,15 +328,19 @@ define void @vg_unwind_with_fixed_args(<4 x i32> %x) #0 { define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-LABEL: vg_unwind_with_sve_args: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset w28, -8 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 48 +; CHECK-NEXT: .cfi_offset w27, -8 +; CHECK-NEXT: .cfi_offset w28, -16 +; CHECK-NEXT: .cfi_offset vg, -32 +; CHECK-NEXT: .cfi_offset w30, -40 +; CHECK-NEXT: .cfi_offset w29, -48 ; CHECK-NEXT: addvl sp, sp, #-18 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 32 + 144 * VG ; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill @@ -352,27 +363,23 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d8 @ cfa - 8 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d9 @ cfa - 16 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d10 @ cfa - 24 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d11 @ cfa - 32 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d12 @ cfa - 40 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d13 @ cfa - 48 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d14 @ cfa - 56 * VG - 32 -; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x60, 0x22 // $d15 @ cfa - 64 * VG - 32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * IncomingVG - 48 +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * IncomingVG - 48 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x11, 0x98, 0x01, 0x1e, 0x22 // sp + 32 + 152 * VG -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z0, [x29, #-19, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: bl scalable_callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x20, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 32 + 144 * VG ; CHECK-NEXT: ptrue pn8.b ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload @@ -396,7 +403,6 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 -; CHECK-NEXT: .cfi_def_cfa wsp, 32 ; CHECK-NEXT: .cfi_restore z8 ; CHECK-NEXT: .cfi_restore z9 ; CHECK-NEXT: .cfi_restore z10 @@ -405,10 +411,13 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; CHECK-NEXT: .cfi_restore z13 ; CHECK-NEXT: .cfi_restore z14 ; CHECK-NEXT: .cfi_restore z15 -; CHECK-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa wsp, 48 +; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w27 ; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret @@ -424,6 +433,7 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: .cfi_def_cfa w29, 48 ; FP-CHECK-NEXT: .cfi_offset w27, -8 ; FP-CHECK-NEXT: .cfi_offset w28, -16 +; FP-CHECK-NEXT: .cfi_offset vg, -32 ; FP-CHECK-NEXT: .cfi_offset w30, -40 ; FP-CHECK-NEXT: .cfi_offset w29, -48 ; FP-CHECK-NEXT: addvl sp, sp, #-18 @@ -449,24 +459,22 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * VG - 48 -; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * VG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * IncomingVG - 48 +; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * IncomingVG - 48 ; FP-CHECK-NEXT: addvl sp, sp, #-1 ; FP-CHECK-NEXT: str z0, [x29, #-19, mul vl] // 16-byte Folded Spill ; FP-CHECK-NEXT: //APP ; FP-CHECK-NEXT: //NO_APP -; FP-CHECK-NEXT: .cfi_offset vg, -32 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: ldr z0, [x29, #-19, mul vl] // 16-byte Folded Reload ; FP-CHECK-NEXT: bl scalable_callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: addvl sp, sp, #1 ; FP-CHECK-NEXT: ptrue pn8.b ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload @@ -505,6 +513,7 @@ define void @vg_unwind_with_sve_args( %x) #0 { ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 ; FP-CHECK-NEXT: .cfi_restore w27 ; FP-CHECK-NEXT: .cfi_restore w28 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: ret @@ -530,7 +539,9 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w28, -8 +; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 @@ -553,19 +564,20 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; CHECK-NEXT: .cfi_def_cfa_register wsp ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: str x8, [x0] -; CHECK-NEXT: .cfi_offset vg, -16 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 @@ -591,6 +603,7 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset w28, -8 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -611,11 +624,9 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; FP-CHECK-NEXT: // %bb.2: // %entry ; FP-CHECK-NEXT: mov x8, sp ; FP-CHECK-NEXT: str x8, [x0] -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: add sp, sp, #80, lsl #12 // =327680 ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload @@ -626,6 +637,7 @@ define void @vg_unwind_multiple_scratch_regs(ptr %out) #1 { ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 ; FP-CHECK-NEXT: .cfi_restore w28 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -648,24 +660,20 @@ entry: ret void } -; Locally streaming functions require storing both the streaming and -; non-streaming values of VG. -; define void @vg_locally_streaming_fn() #3 { ; CHECK-LABEL: vg_locally_streaming_fn: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: lsr x9, x9, #3 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset vg, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -676,18 +684,18 @@ define void @vg_locally_streaming_fn() #3 { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: bl streaming_callee -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -702,18 +710,15 @@ define void @vg_locally_streaming_fn() #3 { ; FP-CHECK: // %bb.0: ; FP-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; FP-CHECK-NEXT: .cfi_def_cfa_offset 96 -; FP-CHECK-NEXT: rdsvl x9, #1 +; FP-CHECK-NEXT: cntd x9 ; FP-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; FP-CHECK-NEXT: lsr x9, x9, #3 ; FP-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; FP-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill -; FP-CHECK-NEXT: cntd x9 ; FP-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; FP-CHECK-NEXT: str x9, [sp, #88] // 8-byte Folded Spill +; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 -; FP-CHECK-NEXT: .cfi_offset vg, -8 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -726,9 +731,7 @@ define void @vg_locally_streaming_fn() #3 { ; FP-CHECK-NEXT: .cfi_offset b15, -96 ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: bl streaming_callee -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 @@ -738,6 +741,7 @@ define void @vg_locally_streaming_fn() #3 { ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -768,10 +772,12 @@ define void @streaming_compatible_to_streaming() #4 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -782,7 +788,6 @@ define void @streaming_compatible_to_streaming() #4 { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: tbnz w19, #0, .LBB6_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm @@ -792,16 +797,17 @@ define void @streaming_compatible_to_streaming() #4 { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB6_4: -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -825,6 +831,7 @@ define void @streaming_compatible_to_streaming() #4 { ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset w19, -8 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -837,7 +844,6 @@ define void @streaming_compatible_to_streaming() #4 { ; FP-CHECK-NEXT: .cfi_offset b15, -96 ; FP-CHECK-NEXT: bl __arm_sme_state ; FP-CHECK-NEXT: mov x19, x0 -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: tbnz w19, #0, .LBB6_2 ; FP-CHECK-NEXT: // %bb.1: ; FP-CHECK-NEXT: smstart sm @@ -847,7 +853,6 @@ define void @streaming_compatible_to_streaming() #4 { ; FP-CHECK-NEXT: // %bb.3: ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: .LBB6_4: -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload @@ -857,6 +862,7 @@ define void @streaming_compatible_to_streaming() #4 { ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 ; FP-CHECK-NEXT: .cfi_restore w19 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -885,10 +891,12 @@ define void @streaming_compatible_to_non_streaming() #4 { ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset w19, -16 -; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: .cfi_offset b8, -40 ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: .cfi_offset b10, -56 @@ -899,7 +907,6 @@ define void @streaming_compatible_to_non_streaming() #4 { ; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: tbz w19, #0, .LBB7_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm @@ -909,16 +916,17 @@ define void @streaming_compatible_to_non_streaming() #4 { ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB7_4: -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -942,6 +950,7 @@ define void @streaming_compatible_to_non_streaming() #4 { ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 ; FP-CHECK-NEXT: .cfi_offset w19, -8 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -954,7 +963,6 @@ define void @streaming_compatible_to_non_streaming() #4 { ; FP-CHECK-NEXT: .cfi_offset b15, -96 ; FP-CHECK-NEXT: bl __arm_sme_state ; FP-CHECK-NEXT: mov x19, x0 -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: tbz w19, #0, .LBB7_2 ; FP-CHECK-NEXT: // %bb.1: ; FP-CHECK-NEXT: smstop sm @@ -964,7 +972,6 @@ define void @streaming_compatible_to_non_streaming() #4 { ; FP-CHECK-NEXT: // %bb.3: ; FP-CHECK-NEXT: smstart sm ; FP-CHECK-NEXT: .LBB7_4: -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload @@ -974,6 +981,7 @@ define void @streaming_compatible_to_non_streaming() #4 { ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 ; FP-CHECK-NEXT: .cfi_restore w19 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -1014,6 +1022,7 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; NO-SVE-CHECK-NEXT: add x29, sp, #64 ; NO-SVE-CHECK-NEXT: .cfi_def_cfa w29, 32 ; NO-SVE-CHECK-NEXT: .cfi_offset w19, -8 +; NO-SVE-CHECK-NEXT: .cfi_offset vg, -16 ; NO-SVE-CHECK-NEXT: .cfi_offset w30, -24 ; NO-SVE-CHECK-NEXT: .cfi_offset w29, -32 ; NO-SVE-CHECK-NEXT: .cfi_offset b8, -40 @@ -1027,7 +1036,6 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; NO-SVE-CHECK-NEXT: mov w8, w0 ; NO-SVE-CHECK-NEXT: bl __arm_sme_state ; NO-SVE-CHECK-NEXT: mov x19, x0 -; NO-SVE-CHECK-NEXT: .cfi_offset vg, -16 ; NO-SVE-CHECK-NEXT: tbnz w19, #0, .LBB8_2 ; NO-SVE-CHECK-NEXT: // %bb.1: ; NO-SVE-CHECK-NEXT: smstart sm @@ -1038,7 +1046,6 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; NO-SVE-CHECK-NEXT: // %bb.3: ; NO-SVE-CHECK-NEXT: smstop sm ; NO-SVE-CHECK-NEXT: .LBB8_4: -; NO-SVE-CHECK-NEXT: .cfi_restore vg ; NO-SVE-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; NO-SVE-CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload @@ -1048,6 +1055,7 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; NO-SVE-CHECK-NEXT: .cfi_def_cfa_offset 0 ; NO-SVE-CHECK-NEXT: .cfi_restore w19 +; NO-SVE-CHECK-NEXT: .cfi_restore vg ; NO-SVE-CHECK-NEXT: .cfi_restore w30 ; NO-SVE-CHECK-NEXT: .cfi_restore w29 ; NO-SVE-CHECK-NEXT: .cfi_restore b8 @@ -1073,30 +1081,29 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ; user-code as if it is part of the frame-setup when doing so. define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { ; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: -; NO-SVE-CHECK: // %bb.0: -; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill -; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; NO-SVE-CHECK-NEXT: mov x9, x0 -; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg -; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill -; NO-SVE-CHECK-NEXT: mov x0, x9 -; NO-SVE-CHECK-NEXT: rdsvl x8, #1 -; NO-SVE-CHECK-NEXT: add x29, sp, #64 -; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 -; NO-SVE-CHECK-NEXT: mov x1, x0 -; NO-SVE-CHECK-NEXT: smstart sm -; NO-SVE-CHECK-NEXT: mov x0, x8 -; NO-SVE-CHECK-NEXT: bl bar -; NO-SVE-CHECK-NEXT: smstop sm -; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload -; NO-SVE-CHECK-NEXT: ret +; NO-SVE-CHECK: // %bb.0: +; NO-SVE-CHECK-NEXT: sub sp, sp, #96 +; NO-SVE-CHECK-NEXT: rdsvl x8, #1 +; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x1, x0 +; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 +; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: add x29, sp, #80 +; NO-SVE-CHECK-NEXT: smstart sm +; NO-SVE-CHECK-NEXT: mov x0, x8 +; NO-SVE-CHECK-NEXT: bl bar +; NO-SVE-CHECK-NEXT: smstop sm +; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: add sp, sp, #96 +; NO-SVE-CHECK-NEXT: ret +; %some_alloc = alloca i64, align 8 %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() call void @bar(i64 %rdsvl, i64 %x0) @@ -1111,34 +1118,37 @@ declare void @bar(i64, i64) "aarch64_pstate_sm_enabled" define void @vg_unwind_noasync() #5 { ; CHECK-LABEL: vg_unwind_noasync: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_offset b8, -24 -; CHECK-NEXT: .cfi_offset b9, -32 -; CHECK-NEXT: .cfi_offset b10, -40 -; CHECK-NEXT: .cfi_offset b11, -48 -; CHECK-NEXT: .cfi_offset b12, -56 -; CHECK-NEXT: .cfi_offset b13, -64 -; CHECK-NEXT: .cfi_offset b14, -72 -; CHECK-NEXT: .cfi_offset b15, -80 -; CHECK-NEXT: .cfi_offset vg, -8 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 @@ -1161,6 +1171,7 @@ define void @vg_unwind_noasync() #5 { ; FP-CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill ; FP-CHECK-NEXT: add x29, sp, #64 ; FP-CHECK-NEXT: .cfi_def_cfa w29, 32 +; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: .cfi_offset w30, -24 ; FP-CHECK-NEXT: .cfi_offset w29, -32 ; FP-CHECK-NEXT: .cfi_offset b8, -40 @@ -1171,11 +1182,9 @@ define void @vg_unwind_noasync() #5 { ; FP-CHECK-NEXT: .cfi_offset b13, -80 ; FP-CHECK-NEXT: .cfi_offset b14, -88 ; FP-CHECK-NEXT: .cfi_offset b15, -96 -; FP-CHECK-NEXT: .cfi_offset vg, -16 ; FP-CHECK-NEXT: smstop sm ; FP-CHECK-NEXT: bl callee ; FP-CHECK-NEXT: smstart sm -; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_def_cfa wsp, 96 ; FP-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload @@ -1183,6 +1192,7 @@ define void @vg_unwind_noasync() #5 { ; FP-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; FP-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; FP-CHECK-NEXT: .cfi_def_cfa_offset 0 +; FP-CHECK-NEXT: .cfi_restore vg ; FP-CHECK-NEXT: .cfi_restore w30 ; FP-CHECK-NEXT: .cfi_restore w29 ; FP-CHECK-NEXT: .cfi_restore b8 @@ -1194,6 +1204,7 @@ define void @vg_unwind_noasync() #5 { ; FP-CHECK-NEXT: .cfi_restore b14 ; FP-CHECK-NEXT: .cfi_restore b15 ; FP-CHECK-NEXT: ret +; ; OUTLINER-CHECK-LABEL: vg_unwind_noasync: ; OUTLINER-CHECK-NOT: OUTLINED_FUNCTION_ ; diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll index c67d91952c618..1de8d0a080b70 100644 --- a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll +++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll @@ -72,12 +72,12 @@ entry: ; mitigated with the -aarch64-enable-zpr-predicate-spills option. define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, i16 %P4) #2 { -; CHECK: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale] -; CHECK: remark: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] -; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] +; CHECK: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-64-258 * vscale] is too close to FPR stack object at [SP-64-256 * vscale] +; CHECK: remark: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-64-16 * vscale] is too close to GPR stack object at [SP-64] +; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1088-258 * vscale] is too close to FPR stack object at [SP-1088-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_call': ; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object -; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-64-16 * vscale] is too close to GPR stack object at [SP-64] ; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object ; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': FPR stack object at {{.*}} is too close to GPR stack object entry: @@ -87,12 +87,12 @@ entry: } define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, i16 %P4) #2 { -; CHECK: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale] -; CHECK: remark: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] -; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] +; CHECK: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-64-258 * vscale] is too close to FPR stack object at [SP-64-256 * vscale] +; CHECK: remark: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-64-16 * vscale] is too close to GPR stack object at [SP-64] +; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1088-258 * vscale] is too close to FPR stack object at [SP-1088-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_alloca_call': ; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object -; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-64-16 * vscale] is too close to GPR stack object at [SP-64] ; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at {{.*}} is too close to FPR stack object ; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at {{.*}} is too close to GPR stack object entry: diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index e5849f32f67d2..c878d888b5f03 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -616,16 +616,13 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK0: // %bb.0: // %entry ; CHECK0-NEXT: sub sp, sp, #176 ; CHECK0-NEXT: .cfi_def_cfa_offset 176 -; CHECK0-NEXT: rdsvl x9, #1 -; CHECK0-NEXT: stp d15, d14, [sp, #48] // 16-byte Folded Spill -; CHECK0-NEXT: lsr x9, x9, #3 -; CHECK0-NEXT: stp d13, d12, [sp, #64] // 16-byte Folded Spill -; CHECK0-NEXT: stp d11, d10, [sp, #80] // 16-byte Folded Spill -; CHECK0-NEXT: str x9, [sp, #32] // 8-byte Folded Spill ; CHECK0-NEXT: cntd x9 -; CHECK0-NEXT: str x9, [sp, #40] // 8-byte Folded Spill -; CHECK0-NEXT: stp d9, d8, [sp, #96] // 16-byte Folded Spill -; CHECK0-NEXT: str x25, [sp, #112] // 8-byte Folded Spill +; CHECK0-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill +; CHECK0-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill +; CHECK0-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill +; CHECK0-NEXT: stp x9, x25, [sp, #112] // 16-byte Folded Spill ; CHECK0-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill ; CHECK0-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill ; CHECK0-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill @@ -635,16 +632,18 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK0-NEXT: .cfi_offset w22, -32 ; CHECK0-NEXT: .cfi_offset w23, -40 ; CHECK0-NEXT: .cfi_offset w24, -48 -; CHECK0-NEXT: .cfi_offset w25, -64 -; CHECK0-NEXT: .cfi_offset b8, -72 -; CHECK0-NEXT: .cfi_offset b9, -80 -; CHECK0-NEXT: .cfi_offset b10, -88 -; CHECK0-NEXT: .cfi_offset b11, -96 -; CHECK0-NEXT: .cfi_offset b12, -104 -; CHECK0-NEXT: .cfi_offset b13, -112 -; CHECK0-NEXT: .cfi_offset b14, -120 -; CHECK0-NEXT: .cfi_offset b15, -128 -; CHECK0-NEXT: .cfi_offset vg, -136 +; CHECK0-NEXT: .cfi_offset w25, -56 +; CHECK0-NEXT: .cfi_offset vg, -64 +; CHECK0-NEXT: .cfi_offset w30, -72 +; CHECK0-NEXT: .cfi_offset w29, -80 +; CHECK0-NEXT: .cfi_offset b8, -88 +; CHECK0-NEXT: .cfi_offset b9, -96 +; CHECK0-NEXT: .cfi_offset b10, -104 +; CHECK0-NEXT: .cfi_offset b11, -112 +; CHECK0-NEXT: .cfi_offset b12, -120 +; CHECK0-NEXT: .cfi_offset b13, -128 +; CHECK0-NEXT: .cfi_offset b14, -136 +; CHECK0-NEXT: .cfi_offset b15, -144 ; CHECK0-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK0-NEXT: smstart sm ; CHECK0-NEXT: //APP @@ -658,12 +657,13 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK0-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK0-NEXT: mov w0, wzr ; CHECK0-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload -; CHECK0-NEXT: ldr x25, [sp, #112] // 8-byte Folded Reload +; CHECK0-NEXT: ldr x25, [sp, #120] // 8-byte Folded Reload ; CHECK0-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload -; CHECK0-NEXT: ldp d9, d8, [sp, #96] // 16-byte Folded Reload -; CHECK0-NEXT: ldp d11, d10, [sp, #80] // 16-byte Folded Reload -; CHECK0-NEXT: ldp d13, d12, [sp, #64] // 16-byte Folded Reload -; CHECK0-NEXT: ldp d15, d14, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload +; CHECK0-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK0-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK0-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK0-NEXT: add sp, sp, #176 ; CHECK0-NEXT: .cfi_def_cfa_offset 0 ; CHECK0-NEXT: .cfi_restore w19 @@ -673,6 +673,9 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK0-NEXT: .cfi_restore w23 ; CHECK0-NEXT: .cfi_restore w24 ; CHECK0-NEXT: .cfi_restore w25 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 ; CHECK0-NEXT: .cfi_restore b8 ; CHECK0-NEXT: .cfi_restore b9 ; CHECK0-NEXT: .cfi_restore b10 @@ -687,16 +690,13 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK64: // %bb.0: // %entry ; CHECK64-NEXT: sub sp, sp, #304 ; CHECK64-NEXT: .cfi_def_cfa_offset 304 -; CHECK64-NEXT: rdsvl x9, #1 -; CHECK64-NEXT: stp d15, d14, [sp, #112] // 16-byte Folded Spill -; CHECK64-NEXT: lsr x9, x9, #3 -; CHECK64-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill -; CHECK64-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill -; CHECK64-NEXT: str x9, [sp, #96] // 8-byte Folded Spill ; CHECK64-NEXT: cntd x9 -; CHECK64-NEXT: str x9, [sp, #104] // 8-byte Folded Spill -; CHECK64-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill -; CHECK64-NEXT: stp x29, x25, [sp, #240] // 16-byte Folded Spill +; CHECK64-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill +; CHECK64-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill +; CHECK64-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill +; CHECK64-NEXT: stp x29, x30, [sp, #224] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x25, [sp, #240] // 16-byte Folded Spill ; CHECK64-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill ; CHECK64-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill ; CHECK64-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill @@ -707,16 +707,17 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK64-NEXT: .cfi_offset w23, -40 ; CHECK64-NEXT: .cfi_offset w24, -48 ; CHECK64-NEXT: .cfi_offset w25, -56 -; CHECK64-NEXT: .cfi_offset w29, -64 -; CHECK64-NEXT: .cfi_offset b8, -136 -; CHECK64-NEXT: .cfi_offset b9, -144 -; CHECK64-NEXT: .cfi_offset b10, -152 -; CHECK64-NEXT: .cfi_offset b11, -160 -; CHECK64-NEXT: .cfi_offset b12, -168 -; CHECK64-NEXT: .cfi_offset b13, -176 -; CHECK64-NEXT: .cfi_offset b14, -184 -; CHECK64-NEXT: .cfi_offset b15, -192 -; CHECK64-NEXT: .cfi_offset vg, -200 +; CHECK64-NEXT: .cfi_offset vg, -64 +; CHECK64-NEXT: .cfi_offset w30, -72 +; CHECK64-NEXT: .cfi_offset w29, -80 +; CHECK64-NEXT: .cfi_offset b8, -152 +; CHECK64-NEXT: .cfi_offset b9, -160 +; CHECK64-NEXT: .cfi_offset b10, -168 +; CHECK64-NEXT: .cfi_offset b11, -176 +; CHECK64-NEXT: .cfi_offset b12, -184 +; CHECK64-NEXT: .cfi_offset b13, -192 +; CHECK64-NEXT: .cfi_offset b14, -200 +; CHECK64-NEXT: .cfi_offset b15, -208 ; CHECK64-NEXT: str d0, [sp, #80] // 8-byte Folded Spill ; CHECK64-NEXT: smstart sm ; CHECK64-NEXT: //APP @@ -730,12 +731,13 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK64-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload ; CHECK64-NEXT: mov w0, wzr ; CHECK64-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x25, [sp, #248] // 8-byte Folded Reload ; CHECK64-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload -; CHECK64-NEXT: ldp x29, x25, [sp, #240] // 16-byte Folded Reload -; CHECK64-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload -; CHECK64-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload -; CHECK64-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload -; CHECK64-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #224] // 16-byte Folded Reload +; CHECK64-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload +; CHECK64-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload +; CHECK64-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload +; CHECK64-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload ; CHECK64-NEXT: add sp, sp, #304 ; CHECK64-NEXT: .cfi_def_cfa_offset 0 ; CHECK64-NEXT: .cfi_restore w19 @@ -745,6 +747,8 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK64-NEXT: .cfi_restore w23 ; CHECK64-NEXT: .cfi_restore w24 ; CHECK64-NEXT: .cfi_restore w25 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: .cfi_restore w30 ; CHECK64-NEXT: .cfi_restore w29 ; CHECK64-NEXT: .cfi_restore b8 ; CHECK64-NEXT: .cfi_restore b9 @@ -758,18 +762,16 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; ; CHECK1024-LABEL: csr_x18_25_d8_15_allocdi64_locallystreaming: ; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: rdsvl x9, #1 -; CHECK1024-NEXT: lsr x9, x9, #3 ; CHECK1024-NEXT: sub sp, sp, #1168 ; CHECK1024-NEXT: .cfi_def_cfa_offset 1168 -; CHECK1024-NEXT: str x9, [sp] // 8-byte Folded Spill ; CHECK1024-NEXT: cntd x9 -; CHECK1024-NEXT: str x9, [sp, #8] // 8-byte Folded Spill -; CHECK1024-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK1024-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK1024-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK1024-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK1024-NEXT: str x29, [sp, #1104] // 8-byte Folded Spill +; CHECK1024-NEXT: stp d15, d14, [sp] // 16-byte Folded Spill +; CHECK1024-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK1024-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK1024-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK1024-NEXT: str x29, [sp, #1088] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1096] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1104] // 8-byte Folded Spill ; CHECK1024-NEXT: str x25, [sp, #1112] // 8-byte Folded Spill ; CHECK1024-NEXT: str x24, [sp, #1120] // 8-byte Folded Spill ; CHECK1024-NEXT: str x23, [sp, #1128] // 8-byte Folded Spill @@ -784,16 +786,17 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK1024-NEXT: .cfi_offset w23, -40 ; CHECK1024-NEXT: .cfi_offset w24, -48 ; CHECK1024-NEXT: .cfi_offset w25, -56 -; CHECK1024-NEXT: .cfi_offset w29, -64 -; CHECK1024-NEXT: .cfi_offset b8, -1096 -; CHECK1024-NEXT: .cfi_offset b9, -1104 -; CHECK1024-NEXT: .cfi_offset b10, -1112 -; CHECK1024-NEXT: .cfi_offset b11, -1120 -; CHECK1024-NEXT: .cfi_offset b12, -1128 -; CHECK1024-NEXT: .cfi_offset b13, -1136 -; CHECK1024-NEXT: .cfi_offset b14, -1144 -; CHECK1024-NEXT: .cfi_offset b15, -1152 -; CHECK1024-NEXT: .cfi_offset vg, -1160 +; CHECK1024-NEXT: .cfi_offset vg, -64 +; CHECK1024-NEXT: .cfi_offset w30, -72 +; CHECK1024-NEXT: .cfi_offset w29, -80 +; CHECK1024-NEXT: .cfi_offset b8, -1112 +; CHECK1024-NEXT: .cfi_offset b9, -1120 +; CHECK1024-NEXT: .cfi_offset b10, -1128 +; CHECK1024-NEXT: .cfi_offset b11, -1136 +; CHECK1024-NEXT: .cfi_offset b12, -1144 +; CHECK1024-NEXT: .cfi_offset b13, -1152 +; CHECK1024-NEXT: .cfi_offset b14, -1160 +; CHECK1024-NEXT: .cfi_offset b15, -1168 ; CHECK1024-NEXT: sub sp, sp, #1056 ; CHECK1024-NEXT: .cfi_def_cfa_offset 2224 ; CHECK1024-NEXT: str d0, [sp, #1040] // 8-byte Folded Spill @@ -809,18 +812,19 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK1024-NEXT: mov w0, wzr ; CHECK1024-NEXT: add sp, sp, #1056 ; CHECK1024-NEXT: .cfi_def_cfa_offset 1168 -; CHECK1024-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK1024-NEXT: ldr x19, [sp, #1160] // 8-byte Folded Reload -; CHECK1024-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK1024-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK1024-NEXT: ldr x20, [sp, #1152] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x21, [sp, #1144] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x22, [sp, #1136] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x23, [sp, #1128] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x24, [sp, #1120] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x25, [sp, #1112] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1104] // 8-byte Folded Reload -; CHECK1024-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK1024-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1096] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1088] // 8-byte Folded Reload +; CHECK1024-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK1024-NEXT: ldp d15, d14, [sp] // 16-byte Folded Reload ; CHECK1024-NEXT: add sp, sp, #1168 ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 ; CHECK1024-NEXT: .cfi_restore w19 @@ -830,6 +834,8 @@ define i32 @csr_x18_25_d8_15_allocdi64_locallystreaming(i64 %d, double %e) "aarc ; CHECK1024-NEXT: .cfi_restore w23 ; CHECK1024-NEXT: .cfi_restore w24 ; CHECK1024-NEXT: .cfi_restore w25 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: .cfi_restore w30 ; CHECK1024-NEXT: .cfi_restore w29 ; CHECK1024-NEXT: .cfi_restore b8 ; CHECK1024-NEXT: .cfi_restore b9 @@ -1570,36 +1576,38 @@ define [2 x ] @sve_signature_pred_2xv4i1_caller([2 x %P0, ptr %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { ; CHECK0-LABEL: svecc_call: ; CHECK0: // %bb.0: // %entry -; CHECK0-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill -; CHECK0-NEXT: .cfi_def_cfa_offset 48 +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 ; CHECK0-NEXT: cntd x9 -; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: stp x27, x19, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 ; CHECK0-NEXT: .cfi_offset w19, -8 -; CHECK0-NEXT: .cfi_offset w27, -16 -; CHECK0-NEXT: .cfi_offset w28, -24 -; CHECK0-NEXT: .cfi_offset w30, -40 -; CHECK0-NEXT: .cfi_offset w29, -48 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 ; CHECK0-NEXT: addvl sp, sp, #-18 -; CHECK0-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x30, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 48 + 144 * VG ; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -1892,20 +1907,19 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * VG - 48 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * VG - 48 +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64 ; CHECK0-NEXT: mov x8, x0 ; CHECK0-NEXT: bl __arm_sme_state ; CHECK0-NEXT: mov x19, x0 ; CHECK0-NEXT: //APP ; CHECK0-NEXT: //NO_APP -; CHECK0-NEXT: .cfi_offset vg, -32 ; CHECK0-NEXT: tbz w19, #0, .LBB28_2 ; CHECK0-NEXT: // %bb.1: // %entry ; CHECK0-NEXT: smstop sm @@ -1918,13 +1932,12 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: // %bb.3: // %entry ; CHECK0-NEXT: smstart sm ; CHECK0-NEXT: .LBB28_4: // %entry -; CHECK0-NEXT: mov w0, #22647 // =0x5877 -; CHECK0-NEXT: movk w0, #59491, lsl #16 -; CHECK0-NEXT: .cfi_restore vg ; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: mov w0, #22647 // =0x5877 ; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: movk w0, #59491, lsl #16 ; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload @@ -1950,7 +1963,6 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK0-NEXT: addvl sp, sp, #18 -; CHECK0-NEXT: .cfi_def_cfa wsp, 48 ; CHECK0-NEXT: .cfi_restore z8 ; CHECK0-NEXT: .cfi_restore z9 ; CHECK0-NEXT: .cfi_restore z10 @@ -1959,32 +1971,39 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK0-NEXT: .cfi_restore z13 ; CHECK0-NEXT: .cfi_restore z14 ; CHECK0-NEXT: .cfi_restore z15 -; CHECK0-NEXT: ldp x27, x19, [sp, #32] // 16-byte Folded Reload -; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload -; CHECK0-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK0-NEXT: .cfi_def_cfa_offset 0 ; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w26 ; CHECK0-NEXT: .cfi_restore w27 ; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore vg ; CHECK0-NEXT: .cfi_restore w30 ; CHECK0-NEXT: .cfi_restore w29 ; CHECK0-NEXT: ret ; ; CHECK64-LABEL: svecc_call: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #112 -; CHECK64-NEXT: .cfi_def_cfa_offset 112 +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 ; CHECK64-NEXT: cntd x9 ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill -; CHECK64-NEXT: stp x27, x19, [sp, #96] // 16-byte Folded Spill -; CHECK64-NEXT: .cfi_offset w19, -8 -; CHECK64-NEXT: .cfi_offset w27, -16 -; CHECK64-NEXT: .cfi_offset w28, -24 -; CHECK64-NEXT: .cfi_offset w30, -40 -; CHECK64-NEXT: .cfi_offset w29, -48 +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 ; CHECK64-NEXT: addvl sp, sp, #-18 -; CHECK64-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xf0, 0x00, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 112 + 144 * VG ; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -2013,22 +2032,20 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d8 @ cfa - 8 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d9 @ cfa - 16 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d10 @ cfa - 24 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d11 @ cfa - 32 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d12 @ cfa - 40 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d13 @ cfa - 48 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d14 @ cfa - 56 * VG - 112 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x90, 0x7f, 0x22 // $d15 @ cfa - 64 * VG - 112 +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d8 @ cfa - 8 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d9 @ cfa - 16 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d10 @ cfa - 24 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d11 @ cfa - 32 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d12 @ cfa - 40 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d13 @ cfa - 48 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d14 @ cfa - 56 * IncomingVG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d15 @ cfa - 64 * IncomingVG - 128 ; CHECK64-NEXT: sub sp, sp, #64 -; CHECK64-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x01, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 176 + 144 * VG ; CHECK64-NEXT: mov x8, x0 ; CHECK64-NEXT: bl __arm_sme_state ; CHECK64-NEXT: mov x19, x0 ; CHECK64-NEXT: //APP ; CHECK64-NEXT: //NO_APP -; CHECK64-NEXT: .cfi_offset vg, -32 ; CHECK64-NEXT: tbz w19, #0, .LBB28_2 ; CHECK64-NEXT: // %bb.1: // %entry ; CHECK64-NEXT: smstop sm @@ -2043,9 +2060,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: .LBB28_4: // %entry ; CHECK64-NEXT: mov w0, #22647 // =0x5877 ; CHECK64-NEXT: movk w0, #59491, lsl #16 -; CHECK64-NEXT: .cfi_restore vg ; CHECK64-NEXT: add sp, sp, #64 -; CHECK64-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xf0, 0x00, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 112 + 144 * VG ; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -2075,7 +2090,6 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: addvl sp, sp, #18 -; CHECK64-NEXT: .cfi_def_cfa wsp, 112 ; CHECK64-NEXT: .cfi_restore z8 ; CHECK64-NEXT: .cfi_restore z9 ; CHECK64-NEXT: .cfi_restore z10 @@ -2084,36 +2098,43 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK64-NEXT: .cfi_restore z13 ; CHECK64-NEXT: .cfi_restore z14 ; CHECK64-NEXT: .cfi_restore z15 -; CHECK64-NEXT: ldp x27, x19, [sp, #96] // 16-byte Folded Reload -; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #112 +; CHECK64-NEXT: add sp, sp, #128 ; CHECK64-NEXT: .cfi_def_cfa_offset 0 ; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w26 ; CHECK64-NEXT: .cfi_restore w27 ; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore vg ; CHECK64-NEXT: .cfi_restore w30 ; CHECK64-NEXT: .cfi_restore w29 ; CHECK64-NEXT: ret ; ; CHECK1024-LABEL: svecc_call: ; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1072 -; CHECK1024-NEXT: .cfi_def_cfa_offset 1072 +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 ; CHECK1024-NEXT: cntd x9 ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill ; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill ; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill ; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NEXT: .cfi_offset w19, -8 -; CHECK1024-NEXT: .cfi_offset w27, -16 -; CHECK1024-NEXT: .cfi_offset w28, -24 -; CHECK1024-NEXT: .cfi_offset w30, -40 -; CHECK1024-NEXT: .cfi_offset w29, -48 +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 ; CHECK1024-NEXT: addvl sp, sp, #-18 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 1072 + 144 * VG ; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -2142,22 +2163,20 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d10 @ cfa - 24 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d11 @ cfa - 32 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d12 @ cfa - 40 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d13 @ cfa - 48 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d14 @ cfa - 56 * VG - 1072 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xd0, 0x77, 0x22 // $d15 @ cfa - 64 * VG - 1072 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 ; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 2096 + 144 * VG ; CHECK1024-NEXT: mov x8, x0 ; CHECK1024-NEXT: bl __arm_sme_state ; CHECK1024-NEXT: mov x19, x0 ; CHECK1024-NEXT: //APP ; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: .cfi_offset vg, -32 ; CHECK1024-NEXT: tbz w19, #0, .LBB28_2 ; CHECK1024-NEXT: // %bb.1: // %entry ; CHECK1024-NEXT: smstop sm @@ -2172,9 +2191,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NEXT: .LBB28_4: // %entry ; CHECK1024-NEXT: mov w0, #22647 // =0x5877 ; CHECK1024-NEXT: movk w0, #59491, lsl #16 -; CHECK1024-NEXT: .cfi_restore vg ; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 1072 + 144 * VG ; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload ; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload @@ -2204,7 +2221,6 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK1024-NEXT: addvl sp, sp, #18 -; CHECK1024-NEXT: .cfi_def_cfa wsp, 1072 ; CHECK1024-NEXT: .cfi_restore z8 ; CHECK1024-NEXT: .cfi_restore z9 ; CHECK1024-NEXT: .cfi_restore z10 @@ -2213,16 +2229,20 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK1024-NEXT: .cfi_restore z13 ; CHECK1024-NEXT: .cfi_restore z14 ; CHECK1024-NEXT: .cfi_restore z15 -; CHECK1024-NEXT: ldr x19, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1072 +; CHECK1024-NEXT: add sp, sp, #1088 ; CHECK1024-NEXT: .cfi_def_cfa_offset 0 ; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w26 ; CHECK1024-NEXT: .cfi_restore w27 ; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore vg ; CHECK1024-NEXT: .cfi_restore w30 ; CHECK1024-NEXT: .cfi_restore w29 ; CHECK1024-NEXT: ret @@ -2235,18 +2255,22 @@ entry: define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { ; CHECK0-LABEL: svecc_alloca_call: ; CHECK0: // %bb.0: // %entry -; CHECK0-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill -; CHECK0-NEXT: .cfi_def_cfa_offset 48 +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 ; CHECK0-NEXT: cntd x9 -; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: stp x27, x19, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 ; CHECK0-NEXT: .cfi_offset w19, -8 -; CHECK0-NEXT: .cfi_offset w27, -16 -; CHECK0-NEXT: .cfi_offset w28, -24 -; CHECK0-NEXT: .cfi_offset w30, -40 -; CHECK0-NEXT: .cfi_offset w29, -48 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 ; CHECK0-NEXT: addvl sp, sp, #-18 -; CHECK0-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x30, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 48 + 144 * VG ; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -2275,21 +2299,19 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, ptr %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { ; CHECK-COMMON-LABEL: svecc_call: ; CHECK-COMMON: // %bb.0: // %entry -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 48 +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 64 ; CHECK-COMMON-NEXT: cntd x9 -; CHECK-COMMON-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp x27, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: .cfi_def_cfa w29, 64 ; CHECK-COMMON-NEXT: .cfi_offset w19, -8 -; CHECK-COMMON-NEXT: .cfi_offset w27, -16 -; CHECK-COMMON-NEXT: .cfi_offset w28, -24 -; CHECK-COMMON-NEXT: .cfi_offset w30, -40 -; CHECK-COMMON-NEXT: .cfi_offset w29, -48 +; CHECK-COMMON-NEXT: .cfi_offset w26, -16 +; CHECK-COMMON-NEXT: .cfi_offset w27, -24 +; CHECK-COMMON-NEXT: .cfi_offset w28, -32 +; CHECK-COMMON-NEXT: .cfi_offset vg, -48 +; CHECK-COMMON-NEXT: .cfi_offset w30, -56 +; CHECK-COMMON-NEXT: .cfi_offset w29, -64 ; CHECK-COMMON-NEXT: addvl sp, sp, #-18 -; CHECK-COMMON-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x30, 0x92, 0x2e, 0x00, 0x11, 0x90, 0x01, 0x1e, 0x22 // sp + 48 + 144 * VG ; CHECK-COMMON-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-COMMON-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-COMMON-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -414,20 +419,19 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d8 @ cfa - 8 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d9 @ cfa - 16 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d10 @ cfa - 24 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d11 @ cfa - 32 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d12 @ cfa - 40 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * VG - 48 -; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * VG - 48 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64 +; CHECK-COMMON-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64 ; CHECK-COMMON-NEXT: mov x8, x0 ; CHECK-COMMON-NEXT: bl __arm_sme_state ; CHECK-COMMON-NEXT: mov x19, x0 ; CHECK-COMMON-NEXT: //APP ; CHECK-COMMON-NEXT: //NO_APP -; CHECK-COMMON-NEXT: .cfi_offset vg, -32 ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB7_2 ; CHECK-COMMON-NEXT: // %bb.1: // %entry ; CHECK-COMMON-NEXT: smstop sm @@ -440,13 +444,12 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: // %bb.3: // %entry ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: .LBB7_4: // %entry -; CHECK-COMMON-NEXT: mov w0, #22647 // =0x5877 -; CHECK-COMMON-NEXT: movk w0, #59491, lsl #16 -; CHECK-COMMON-NEXT: .cfi_restore vg ; CHECK-COMMON-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: mov w0, #22647 // =0x5877 ; CHECK-COMMON-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: movk w0, #59491, lsl #16 ; CHECK-COMMON-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload @@ -472,7 +475,6 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK-COMMON-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-COMMON-NEXT: addvl sp, sp, #18 -; CHECK-COMMON-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-COMMON-NEXT: .cfi_restore z8 ; CHECK-COMMON-NEXT: .cfi_restore z9 ; CHECK-COMMON-NEXT: .cfi_restore z10 @@ -481,13 +483,16 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, ; CHECK-COMMON-NEXT: .cfi_restore z13 ; CHECK-COMMON-NEXT: .cfi_restore z14 ; CHECK-COMMON-NEXT: .cfi_restore z15 -; CHECK-COMMON-NEXT: ldp x27, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: .cfi_def_cfa wsp, 64 +; CHECK-COMMON-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 0 ; CHECK-COMMON-NEXT: .cfi_restore w19 +; CHECK-COMMON-NEXT: .cfi_restore w26 ; CHECK-COMMON-NEXT: .cfi_restore w27 ; CHECK-COMMON-NEXT: .cfi_restore w28 +; CHECK-COMMON-NEXT: .cfi_restore vg ; CHECK-COMMON-NEXT: .cfi_restore w30 ; CHECK-COMMON-NEXT: .cfi_restore w29 ; CHECK-COMMON-NEXT: ret @@ -534,6 +539,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset vg, -32 ; CHECK-NEXT: .cfi_offset w30, -40 ; CHECK-NEXT: .cfi_offset w29, -48 ; CHECK-NEXT: .cfi_offset b8, -56 @@ -556,11 +562,9 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEXT: stur wzr, [x29, #-68] ; CHECK-NEXT: sturh w8, [x29, #-72] ; CHECK-NEXT: msr TPIDR2_EL0, x9 -; CHECK-NEXT: .cfi_offset vg, -32 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl other ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #80 @@ -581,6 +585,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 ; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore vg ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: .cfi_restore b8 @@ -608,6 +613,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 48 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -8 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w20, -16 +; CHECK-NEWLOWERING-NEXT: .cfi_offset vg, -32 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -40 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -48 ; CHECK-NEWLOWERING-NEXT: .cfi_offset b8, -56 @@ -627,13 +633,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEWLOWERING-NEXT: mov w20, w0 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80] ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 -; CHECK-NEWLOWERING-NEXT: .cfi_offset vg, -32 ; CHECK-NEWLOWERING-NEXT: smstop sm ; CHECK-NEWLOWERING-NEXT: bl other ; CHECK-NEWLOWERING-NEXT: smstart sm ; CHECK-NEWLOWERING-NEXT: mov w0, w20 ; CHECK-NEWLOWERING-NEXT: mov w8, w0 -; CHECK-NEWLOWERING-NEXT: .cfi_restore vg ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80 @@ -654,6 +658,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEWLOWERING-NEXT: .cfi_restore w19 ; CHECK-NEWLOWERING-NEXT: .cfi_restore w20 +; CHECK-NEWLOWERING-NEXT: .cfi_restore vg ; CHECK-NEWLOWERING-NEXT: .cfi_restore w30 ; CHECK-NEWLOWERING-NEXT: .cfi_restore w29 ; CHECK-NEWLOWERING-NEXT: .cfi_restore b8