diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp index c7fdd7d7c3502..1ba265a60c3d6 100644 --- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp +++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp @@ -193,31 +193,23 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case M68k::MOV8dc: return TII->ExpandCCR(MIB, /*IsToCCR=*/false); - case M68k::MOVM8jm_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false); case M68k::MOVM16jm_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false); + return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16jm), /*IsRM=*/false); case M68k::MOVM32jm_P: return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32jm), /*IsRM=*/false); - case M68k::MOVM8pm_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false); case M68k::MOVM16pm_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false); + return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16pm), /*IsRM=*/false); case M68k::MOVM32pm_P: return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32pm), /*IsRM=*/false); - case M68k::MOVM8mj_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true); case M68k::MOVM16mj_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true); + return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16mj), /*IsRM=*/true); case M68k::MOVM32mj_P: return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mj), /*IsRM=*/true); - case M68k::MOVM8mp_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true); case M68k::MOVM16mp_P: - return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true); + return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM16mp), /*IsRM=*/true); case M68k::MOVM32mp_P: return TII->ExpandMOVEM(MIB, TII->get(M68k::MOVM32mp), /*IsRM=*/true); diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td index a7d7f1826f7f9..398c55fa6da4a 100644 --- a/llvm/lib/Target/M68k/M68kInstrData.td +++ b/llvm/lib/Target/M68k/M68kInstrData.td @@ -337,20 +337,16 @@ class MxMOVEM_RM_Pseudo : MxPseudo<(outs TYPE.ROp:$dst), (ins MEMOp:$src)>; // Mem <- Reg -def MOVM8jm_P : MxMOVEM_MR_Pseudo; def MOVM16jm_P : MxMOVEM_MR_Pseudo; def MOVM32jm_P : MxMOVEM_MR_Pseudo; -def MOVM8pm_P : MxMOVEM_MR_Pseudo; def MOVM16pm_P : MxMOVEM_MR_Pseudo; def MOVM32pm_P : MxMOVEM_MR_Pseudo; // Reg <- Mem -def MOVM8mj_P : MxMOVEM_RM_Pseudo; def MOVM16mj_P : MxMOVEM_RM_Pseudo; def MOVM32mj_P : MxMOVEM_RM_Pseudo; -def MOVM8mp_P : MxMOVEM_RM_Pseudo; def MOVM16mp_P : MxMOVEM_RM_Pseudo; def MOVM32mp_P : MxMOVEM_RM_Pseudo; diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp index 182582642c50e..febd020f3f2a5 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp +++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp @@ -593,7 +593,6 @@ bool M68kInstrInfo::ExpandCCR(MachineInstrBuilder &MIB, bool IsToCCR) const { bool M68kInstrInfo::ExpandMOVEM(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, bool IsRM) const { int Reg = 0, Offset = 0, Base = 0; - auto XR32 = RI.getRegClass(M68k::XR32RegClassID); auto DL = MIB->getDebugLoc(); auto MI = MIB.getInstr(); auto &MBB = *MIB->getParent(); @@ -608,13 +607,6 @@ bool M68kInstrInfo::ExpandMOVEM(MachineInstrBuilder &MIB, Reg = MIB->getOperand(2).getReg(); } - // If the register is not in XR32 then it is smaller than 32 bit, we - // implicitly promote it to 32 - if (!XR32->contains(Reg)) { - Reg = RI.getMatchingMegaReg(Reg, XR32); - assert(Reg && "Has not meaningful MEGA register"); - } - unsigned Mask = 1 << RI.getSpillRegisterOrder(Reg); if (IsRM) { BuildMI(MBB, MI, DL, Desc) @@ -799,22 +791,25 @@ namespace { unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, const M68kSubtarget &STI, bool load) { - switch (TRI->getRegSizeInBits(*RC)) { + switch (TRI->getSpillSize(*RC)) { default: + LLVM_DEBUG( + dbgs() << "Cannot determine appropriate opcode for load/store to/from " + << TRI->getName(Reg) << " of class " << TRI->getRegClassName(RC) + << " with spill size " << TRI->getSpillSize(*RC) << '\n'); llvm_unreachable("Unknown spill size"); - case 8: + case 2: + if (M68k::XR16RegClass.hasSubClassEq(RC)) + return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P; if (M68k::DR8RegClass.hasSubClassEq(RC)) - return load ? M68k::MOV8dp : M68k::MOV8pd; + return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P; if (M68k::CCRCRegClass.hasSubClassEq(RC)) - return load ? M68k::MOV16cp : M68k::MOV16pc; - - llvm_unreachable("Unknown 1-byte regclass"); - case 16: - assert(M68k::XR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); - return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P; - case 32: - assert(M68k::XR32RegClass.hasSubClassEq(RC) && "Unknown 4-byte regclass"); - return load ? M68k::MOVM32mp_P : M68k::MOVM32pm_P; + return load ? M68k::MOVM16mp_P : M68k::MOVM16pm_P; + llvm_unreachable("Unknown 2-byte regclass"); + case 4: + if (M68k::XR32RegClass.hasSubClassEq(RC)) + return load ? M68k::MOVM32mp_P : M68k::MOVM32pm_P; + llvm_unreachable("Unknown 4-byte regclass"); } } diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.td b/llvm/lib/Target/M68k/M68kRegisterInfo.td index 45b492eba4ec0..4942636ffd529 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.td +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.td @@ -99,52 +99,77 @@ class MxRegClass regTypes, int alignment, dag regList> : RegisterClass<"M68k", regTypes, alignment, regList>; // Data Registers +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<8,16,16>]> in def DR8 : MxRegClass<[i8], 16, (sequence "BD%u", 0, 7)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def DR16 : MxRegClass<[i16], 16, (sequence "WD%u", 0, 7)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def DR32 : MxRegClass<[i32], 32, (sequence "D%u", 0, 7)>; // Address Registers +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def AR16 : MxRegClass<[i16], 16, (add (sequence "WA%u", 0, 6), WSP)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def AR32 : MxRegClass<[i32], 32, (add (sequence "A%u", 0, 6), SP)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def AR32_NOSP : MxRegClass<[i32], 32, (sequence "A%u", 0, 6)>; // Index Register Classes // FIXME try alternative ordering like `D0, D1, A0, A1, ...` +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def XR16 : MxRegClass<[i16], 16, (add DR16, AR16)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def XR32 : MxRegClass<[i32], 32, (add DR32, AR32)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def SPC : MxRegClass<[i32], 32, (add SP)>; // Floating Point Data Registers +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def FPDR32 : MxRegClass<[f32], 32, (sequence "FP%u", 0, 7)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<64,64,32>]> in def FPDR64 : MxRegClass<[f64], 32, (add FPDR32)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<80,128,32>]> in def FPDR80 : MxRegClass<[f80], 32, (add FPDR32)>; let CopyCost = -1 in { + let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<8,16,16>]> in def CCRC : MxRegClass<[i8], 16, (add CCR)>; + let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def SRC : MxRegClass<[i16], 16, (add SR)>; // Float Point System Control Registers - def FPIC : MxRegClass<[i32], 32, (add FPIAR)>; - def FPCSC : MxRegClass<[i32], 32, (add FPC, FPS)>; - def FPSYSC : MxRegClass<[i32], 32, (add FPCSC, FPIC)>; + let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in { + def FPIC : MxRegClass<[i32], 32, (add FPIAR)>; + def FPCSC : MxRegClass<[i32], 32, (add FPC, FPS)>; + def FPSYSC : MxRegClass<[i32], 32, (add FPCSC, FPIC)>; + } } let isAllocatable = 0 in { + let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def PCC : MxRegClass<[i32], 32, (add PC)>; } // Register used with tail call +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def DR16_TC : MxRegClass<[i16], 16, (add D0, D1)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def DR32_TC : MxRegClass<[i32], 32, (add D0, D1)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def AR16_TC : MxRegClass<[i16], 16, (add A0, A1)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def AR32_TC : MxRegClass<[i32], 32, (add A0, A1)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<16,16,16>]> in def XR16_TC : MxRegClass<[i16], 16, (add DR16_TC, AR16_TC)>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in def XR32_TC : MxRegClass<[i32], 32, (add DR32_TC, AR32_TC)>; // These classes provide spill/restore order if used with MOVEM instruction -def SPILL : MxRegClass<[i32], 32, (add XR32)>; -def SPILL_R : MxRegClass<[i32], 32, (add SP, (sequence "A%u", 6, 0), (sequence "D%u", 7, 0))>; +let RegInfos = RegInfoByHwMode<[DefaultMode], [RegInfo<32,32,32>]> in { + def SPILL : MxRegClass<[i32], 32, (add XR32)>; + def SPILL_R : MxRegClass<[i32], 32, (add SP, (sequence "A%u", 6, 0), (sequence "D%u", 7, 0))>; +} diff --git a/llvm/test/CodeGen/M68k/PR57660.ll b/llvm/test/CodeGen/M68k/PR57660.ll index bad949b08cafa..359f0c2496356 100644 --- a/llvm/test/CodeGen/M68k/PR57660.ll +++ b/llvm/test/CodeGen/M68k/PR57660.ll @@ -8,10 +8,10 @@ define dso_local void @foo1() { ; CHECK-NEXT: suba.l #2, %sp ; CHECK-NEXT: .cfi_def_cfa_offset -6 ; CHECK-NEXT: moveq #0, %d0 -; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: movem.w %d0, (0,%sp) ; CHECK-NEXT: .LBB0_1: ; %do.body ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload +; CHECK-NEXT: movem.w (0,%sp), %d0 ; CHECK-NEXT: cmpi.b #0, %d0 ; CHECK-NEXT: bne .LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %do.end @@ -39,24 +39,24 @@ define i32 @foo2(ptr noundef %0) { ; CHECK-NEXT: .cfi_def_cfa_offset -8 ; CHECK-NEXT: move.l (8,%sp), %a0 ; CHECK-NEXT: move.b (%a0), %d0 -; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: movem.w %d0, (0,%sp) ; CHECK-NEXT: and.b #1, %d0 -; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: movem.w %d0, (2,%sp) ; CHECK-NEXT: sub.b #1, %d0 ; CHECK-NEXT: bgt .LBB1_2 ; CHECK-NEXT: ; %bb.1: ; %if -; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload -; CHECK-NEXT: move.b (0,%sp), %d1 ; 1-byte Folded Reload +; CHECK-NEXT: movem.w (2,%sp), %d0 +; CHECK-NEXT: movem.w (0,%sp), %d1 ; CHECK-NEXT: add.b %d1, %d0 ; CHECK-NEXT: bra .LBB1_3 ; CHECK-NEXT: .LBB1_2: ; %else -; CHECK-NEXT: move.b (2,%sp), %d1 ; 1-byte Folded Reload -; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload +; CHECK-NEXT: movem.w (2,%sp), %d1 +; CHECK-NEXT: movem.w (0,%sp), %d0 ; CHECK-NEXT: sub.b %d1, %d0 -; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill +; CHECK-NEXT: movem.w %d0, (0,%sp) ; CHECK-NEXT: .LBB1_3: ; %cont -; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill -; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload +; CHECK-NEXT: movem.w %d0, (2,%sp) +; CHECK-NEXT: movem.w (2,%sp), %d0 ; CHECK-NEXT: ext.w %d0 ; CHECK-NEXT: ext.l %d0 ; CHECK-NEXT: adda.l #4, %sp diff --git a/llvm/test/CodeGen/M68k/register-spills.ll b/llvm/test/CodeGen/M68k/register-spills.ll new file mode 100644 index 0000000000000..9104a59f5d6b3 --- /dev/null +++ b/llvm/test/CodeGen/M68k/register-spills.ll @@ -0,0 +1,464 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=m68k -O0 %s -o - | FileCheck %s + +declare zeroext i1 @get1() +declare i8 @get8() +declare i16 @get16() +declare i32 @get32() + +define void @test_edge_detection_conditional_branch() { +; CHECK-LABEL: test_edge_detection_conditional_branch: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %start +; CHECK-NEXT: suba.l #12, %sp +; CHECK-NEXT: .cfi_def_cfa_offset -16 +; CHECK-NEXT: movem.l %d2, (8,%sp) ; 8-byte Folded Spill +; CHECK-NEXT: bra .LBB0_1 +; CHECK-NEXT: .LBB0_1: ; %condition_check +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jsr get1 +; CHECK-NEXT: move.b (7,%sp), %d2 +; CHECK-NEXT: and.b #1, %d2 +; CHECK-NEXT: move.b %d0, %d1 +; CHECK-NEXT: sub.b %d2, %d1 +; CHECK-NEXT: movem.w %d0, (4,%sp) +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: bra .LBB0_2 +; CHECK-NEXT: .LBB0_2: ; %do_something +; CHECK-NEXT: movem.w (4,%sp), %d0 +; CHECK-NEXT: move.b %d0, (7,%sp) +; CHECK-NEXT: movem.l (8,%sp), %d2 ; 8-byte Folded Reload +; CHECK-NEXT: adda.l #12, %sp +; CHECK-NEXT: rts +start: + %prev_state = alloca [1 x i8], align 1 + br label %condition_check + +condition_check: + %state = call zeroext i1 @get1() + %local_prev_state = load i8, ptr %prev_state, align 1 + %local_prev_state_trunc = trunc i8 %local_prev_state to i1 + %result = icmp ne i1 %state, %local_prev_state_trunc + br i1 %result, label %condition_check, label %do_something + +do_something: + %state_ext = zext i1 %state to i8 + store i8 %state_ext, ptr %prev_state, align 1 + ret void +} + +define void @test_force_spill_8() { +; CHECK-LABEL: test_force_spill_8: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %start +; CHECK-NEXT: suba.l #108, %sp +; CHECK-NEXT: .cfi_def_cfa_offset -112 +; CHECK-NEXT: movem.l %d2-%d7, (84,%sp) ; 28-byte Folded Spill +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (82,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (66,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d2 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d3 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d4 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d5 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d6 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d7 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (80,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (78,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (76,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (74,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (72,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (70,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (68,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w (66,%sp), %d1 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %sp, %a0 +; CHECK-NEXT: move.l %d0, (60,%a0) +; CHECK-NEXT: movem.w (68,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (56,%a0) +; CHECK-NEXT: movem.w (70,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (52,%a0) +; CHECK-NEXT: movem.w (72,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (48,%a0) +; CHECK-NEXT: movem.w (74,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (44,%a0) +; CHECK-NEXT: movem.w (76,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (40,%a0) +; CHECK-NEXT: movem.w (78,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (36,%a0) +; CHECK-NEXT: movem.w (80,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (32,%a0) +; CHECK-NEXT: movem.w (82,%sp), %d0 +; CHECK-NEXT: and.l #255, %d7 +; CHECK-NEXT: move.l %d7, (28,%a0) +; CHECK-NEXT: and.l #255, %d6 +; CHECK-NEXT: move.l %d6, (24,%a0) +; CHECK-NEXT: and.l #255, %d5 +; CHECK-NEXT: move.l %d5, (20,%a0) +; CHECK-NEXT: and.l #255, %d4 +; CHECK-NEXT: move.l %d4, (16,%a0) +; CHECK-NEXT: and.l #255, %d3 +; CHECK-NEXT: move.l %d3, (12,%a0) +; CHECK-NEXT: and.l #255, %d2 +; CHECK-NEXT: move.l %d2, (8,%a0) +; CHECK-NEXT: and.l #255, %d1 +; CHECK-NEXT: move.l %d1, (4,%a0) +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (%a0) +; CHECK-NEXT: jsr test_force_spill_8_consumer +; CHECK-NEXT: movem.l (84,%sp), %d2-%d7 ; 28-byte Folded Reload +; CHECK-NEXT: adda.l #108, %sp +; CHECK-NEXT: rts + start: + %r0 = call i8 @get8() + %r1 = call i8 @get8() + %r2 = call i8 @get8() + %r3 = call i8 @get8() + %r4 = call i8 @get8() + %r5 = call i8 @get8() + %r6 = call i8 @get8() + %r7 = call i8 @get8() + %r8 = call i8 @get8() + %r9 = call i8 @get8() + %ra = call i8 @get8() + %rb = call i8 @get8() + %rc = call i8 @get8() + %rd = call i8 @get8() + %re = call i8 @get8() + %rf = call i8 @get8() + call void @test_force_spill_8_consumer(i8 %r0, i8 %r1, i8 %r2, i8 %r3, i8 %r4, i8 %r5, i8 %r6, i8 %r7, i8 %r8, i8 %r9, i8 %ra, i8 %rb, i8 %rc, i8 %rd, i8 %re, i8 %rf) + ret void +} + +declare void @test_force_spill_8_consumer(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8) + +define void @test_force_spill_16() { +; CHECK-LABEL: test_force_spill_16: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %start +; CHECK-NEXT: suba.l #116, %sp +; CHECK-NEXT: .cfi_def_cfa_offset -120 +; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (72,%sp) ; 48-byte Folded Spill +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (70,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (66,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (64,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d2 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d3 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d4 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d5 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d6 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d7 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a2 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a3 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a4 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a5 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a6 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (68,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w (64,%sp), %a1 +; CHECK-NEXT: movem.w (66,%sp), %d1 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %sp, %a0 +; CHECK-NEXT: move.l %d0, (60,%a0) +; CHECK-NEXT: movem.w (68,%sp), %d0 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %d0, (56,%a0) +; CHECK-NEXT: movem.w (70,%sp), %d0 +; CHECK-NEXT: and.l #65535, %a6 +; CHECK-NEXT: move.l %a6, (52,%a0) +; CHECK-NEXT: and.l #65535, %a5 +; CHECK-NEXT: move.l %a5, (48,%a0) +; CHECK-NEXT: and.l #65535, %a4 +; CHECK-NEXT: move.l %a4, (44,%a0) +; CHECK-NEXT: and.l #65535, %a3 +; CHECK-NEXT: move.l %a3, (40,%a0) +; CHECK-NEXT: and.l #65535, %a2 +; CHECK-NEXT: move.l %a2, (36,%a0) +; CHECK-NEXT: and.l #65535, %d7 +; CHECK-NEXT: move.l %d7, (32,%a0) +; CHECK-NEXT: and.l #65535, %d6 +; CHECK-NEXT: move.l %d6, (28,%a0) +; CHECK-NEXT: and.l #65535, %d5 +; CHECK-NEXT: move.l %d5, (24,%a0) +; CHECK-NEXT: and.l #65535, %d4 +; CHECK-NEXT: move.l %d4, (20,%a0) +; CHECK-NEXT: and.l #65535, %d3 +; CHECK-NEXT: move.l %d3, (16,%a0) +; CHECK-NEXT: and.l #65535, %d2 +; CHECK-NEXT: move.l %d2, (12,%a0) +; CHECK-NEXT: and.l #65535, %a1 +; CHECK-NEXT: move.l %a1, (8,%a0) +; CHECK-NEXT: and.l #65535, %d1 +; CHECK-NEXT: move.l %d1, (4,%a0) +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %d0, (%a0) +; CHECK-NEXT: jsr test_force_spill_16_consumer +; CHECK-NEXT: movem.l (72,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload +; CHECK-NEXT: adda.l #116, %sp +; CHECK-NEXT: rts + start: + %r0 = call i16 @get16() + %r1 = call i16 @get16() + %r2 = call i16 @get16() + %r3 = call i16 @get16() + %r4 = call i16 @get16() + %r5 = call i16 @get16() + %r6 = call i16 @get16() + %r7 = call i16 @get16() + %r8 = call i16 @get16() + %r9 = call i16 @get16() + %ra = call i16 @get16() + %rb = call i16 @get16() + %rc = call i16 @get16() + %rd = call i16 @get16() + %re = call i16 @get16() + %rf = call i16 @get16() + call void @test_force_spill_16_consumer(i16 %r0, i16 %r1, i16 %r2, i16 %r3, i16 %r4, i16 %r5, i16 %r6, i16 %r7, i16 %r8, i16 %r9, i16 %ra, i16 %rb, i16 %rc, i16 %rd, i16 %re, i16 %rf) + ret void +} + +declare void @test_force_spill_16_consumer(i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16) + +define void @test_force_spill_32() { +; CHECK-LABEL: test_force_spill_32: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %start +; CHECK-NEXT: suba.l #124, %sp +; CHECK-NEXT: .cfi_def_cfa_offset -128 +; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (80,%sp) ; 48-byte Folded Spill +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (76,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (68,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (64,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d2 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d3 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d4 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d5 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d6 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d7 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a2 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a3 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a4 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a5 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a6 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (72,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l (64,%sp), %a1 +; CHECK-NEXT: movem.l (68,%sp), %d1 +; CHECK-NEXT: move.l %sp, %a0 +; CHECK-NEXT: move.l %d0, (60,%a0) +; CHECK-NEXT: movem.l (72,%sp), %d0 +; CHECK-NEXT: move.l %d0, (56,%a0) +; CHECK-NEXT: movem.l (76,%sp), %d0 +; CHECK-NEXT: move.l %a6, (52,%a0) +; CHECK-NEXT: move.l %a5, (48,%a0) +; CHECK-NEXT: move.l %a4, (44,%a0) +; CHECK-NEXT: move.l %a3, (40,%a0) +; CHECK-NEXT: move.l %a2, (36,%a0) +; CHECK-NEXT: move.l %d7, (32,%a0) +; CHECK-NEXT: move.l %d6, (28,%a0) +; CHECK-NEXT: move.l %d5, (24,%a0) +; CHECK-NEXT: move.l %d4, (20,%a0) +; CHECK-NEXT: move.l %d3, (16,%a0) +; CHECK-NEXT: move.l %d2, (12,%a0) +; CHECK-NEXT: move.l %a1, (8,%a0) +; CHECK-NEXT: move.l %d1, (4,%a0) +; CHECK-NEXT: move.l %d0, (%a0) +; CHECK-NEXT: jsr test_force_spill_32_consumer +; CHECK-NEXT: movem.l (80,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload +; CHECK-NEXT: adda.l #124, %sp +; CHECK-NEXT: rts + start: + %r0 = call i32 @get32() + %r1 = call i32 @get32() + %r2 = call i32 @get32() + %r3 = call i32 @get32() + %r4 = call i32 @get32() + %r5 = call i32 @get32() + %r6 = call i32 @get32() + %r7 = call i32 @get32() + %r8 = call i32 @get32() + %r9 = call i32 @get32() + %ra = call i32 @get32() + %rb = call i32 @get32() + %rc = call i32 @get32() + %rd = call i32 @get32() + %re = call i32 @get32() + %rf = call i32 @get32() + call void @test_force_spill_32_consumer(i32 %r0, i32 %r1, i32 %r2, i32 %r3, i32 %r4, i32 %r5, i32 %r6, i32 %r7, i32 %r8, i32 %r9, i32 %ra, i32 %rb, i32 %rc, i32 %rd, i32 %re, i32 %rf) + ret void +} + +declare void @test_force_spill_32_consumer(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +define void @test_force_spill_mixed() { +; CHECK-LABEL: test_force_spill_mixed: +; CHECK: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %start +; CHECK-NEXT: suba.l #148, %sp +; CHECK-NEXT: .cfi_def_cfa_offset -152 +; CHECK-NEXT: movem.l %d2-%d7/%a2-%a6, (104,%sp) ; 48-byte Folded Spill +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (102,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (86,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (80,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d2 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d3 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d4 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %d5 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %d6 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: move.b %d0, %d7 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a2 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a3 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a4 +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (100,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: move.w %d0, %a5 +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: move.l %d0, %a6 +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (98,%sp) +; CHECK-NEXT: jsr get8 +; CHECK-NEXT: movem.w %d0, (96,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.w %d0, (94,%sp) +; CHECK-NEXT: jsr get32 +; CHECK-NEXT: movem.l %d0, (88,%sp) +; CHECK-NEXT: jsr get16 +; CHECK-NEXT: movem.l (80,%sp), %a1 +; CHECK-NEXT: movem.w (86,%sp), %d1 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %sp, %a0 +; CHECK-NEXT: move.l %d0, (76,%a0) +; CHECK-NEXT: movem.l (88,%sp), %d0 +; CHECK-NEXT: move.l %d0, (72,%a0) +; CHECK-NEXT: movem.w (94,%sp), %d0 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %d0, (68,%a0) +; CHECK-NEXT: movem.w (96,%sp), %d0 +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (64,%a0) +; CHECK-NEXT: movem.w (98,%sp), %d0 +; CHECK-NEXT: and.l #65535, %d0 +; CHECK-NEXT: move.l %d0, (60,%a0) +; CHECK-NEXT: movem.w (100,%sp), %d0 +; CHECK-NEXT: move.l %a6, (56,%a0) +; CHECK-NEXT: and.l #65535, %a5 +; CHECK-NEXT: move.l %a5, (52,%a0) +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (48,%a0) +; CHECK-NEXT: movem.w (102,%sp), %d0 +; CHECK-NEXT: and.l #65535, %a4 +; CHECK-NEXT: move.l %a4, (44,%a0) +; CHECK-NEXT: move.l %a3, (40,%a0) +; CHECK-NEXT: and.l #65535, %a2 +; CHECK-NEXT: move.l %a2, (36,%a0) +; CHECK-NEXT: and.l #255, %d7 +; CHECK-NEXT: move.l %d7, (32,%a0) +; CHECK-NEXT: and.l #65535, %d6 +; CHECK-NEXT: move.l %d6, (28,%a0) +; CHECK-NEXT: move.l %d5, (24,%a0) +; CHECK-NEXT: and.l #65535, %d4 +; CHECK-NEXT: move.l %d4, (20,%a0) +; CHECK-NEXT: and.l #255, %d3 +; CHECK-NEXT: move.l %d3, (16,%a0) +; CHECK-NEXT: and.l #65535, %d2 +; CHECK-NEXT: move.l %d2, (12,%a0) +; CHECK-NEXT: move.l %a1, (8,%a0) +; CHECK-NEXT: and.l #65535, %d1 +; CHECK-NEXT: move.l %d1, (4,%a0) +; CHECK-NEXT: and.l #255, %d0 +; CHECK-NEXT: move.l %d0, (%a0) +; CHECK-NEXT: jsr test_force_spill_mixed_consumer +; CHECK-NEXT: movem.l (104,%sp), %d2-%d7/%a2-%a6 ; 48-byte Folded Reload +; CHECK-NEXT: adda.l #148, %sp +; CHECK-NEXT: rts + start: + %r0 = call i8 @get8() + %r1 = call i16 @get16() + %r2 = call i32 @get32() + %r3 = call i16 @get16() + %r4 = call i8 @get8() + %r5 = call i16 @get16() + %r6 = call i32 @get32() + %r7 = call i16 @get16() + %r8 = call i8 @get8() + %r9 = call i16 @get16() + %ra = call i32 @get32() + %rb = call i16 @get16() + %rc = call i8 @get8() + %rd = call i16 @get16() + %re = call i32 @get32() + %rf = call i16 @get16() + %rg = call i8 @get8() + %rh = call i16 @get16() + %ri = call i32 @get32() + %rj = call i16 @get16() + call void @test_force_spill_mixed_consumer(i8 %r0, i16 %r1, i32 %r2, i16 %r3, i8 %r4, i16 %r5, i32 %r6, i16 %r7, i8 %r8, i16 %r9, i32 %ra, i16 %rb, i8 %rc, i16 %rd, i32 %re, i16 %rf, i8 %rg, i16 %rh, i32 %ri, i16 %rj) + ret void +} + +declare void @test_force_spill_mixed_consumer(i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16, i8, i16, i32, i16)