@@ -333,8 +333,6 @@ void MachineSMEABI::insertStateChanges() {
333
333
BlockInfo &Block = State.Blocks [MBB.getNumber ()];
334
334
ZAState InState =
335
335
State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ false )];
336
- ZAState OutState =
337
- State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ true )];
338
336
339
337
ZAState CurrentState = Block.FixedEntryState ;
340
338
if (CurrentState == ZAState::ANY)
@@ -350,6 +348,8 @@ void MachineSMEABI::insertStateChanges() {
350
348
if (MBB.succ_empty ())
351
349
continue ;
352
350
351
+ ZAState OutState =
352
+ State.BundleStates [Bundles->getBundle (MBB.getNumber (), /* Out=*/ true )];
353
353
if (CurrentState != OutState)
354
354
emitStateChange (MBB, MBB.getFirstTerminator (), CurrentState, OutState,
355
355
Block.PhysLiveRegsAtExit );
@@ -397,8 +397,7 @@ PhysRegSave MachineSMEABI::createPhysRegSave(LiveRegs PhysLiveRegs,
397
397
PhysRegSave RegSave{PhysLiveRegs};
398
398
if (PhysLiveRegs & LiveRegs::NZCV) {
399
399
RegSave.StatusFlags = MRI->createVirtualRegister (&AArch64::GPR64RegClass);
400
- BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS))
401
- .addReg (RegSave.StatusFlags , RegState::Define)
400
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS), RegSave.StatusFlags )
402
401
.addImm (AArch64SysReg::NZCV)
403
402
.addReg (AArch64::NZCV, RegState::Implicit);
404
403
}
@@ -445,8 +444,7 @@ void MachineSMEABI::emitRestoreLazySave(MachineBasicBlock &MBB,
445
444
.addImm (AArch64SVCR::SVCRZA)
446
445
.addImm (1 );
447
446
// Get current TPIDR2_EL0.
448
- BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS))
449
- .addReg (TPIDR2EL0, RegState::Define)
447
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MRS), TPIDR2EL0)
450
448
.addImm (AArch64SysReg::TPIDR2_EL0);
451
449
// Get pointer to TPIDR2 block.
452
450
BuildMI (MBB, MBBI, DL, TII->get (AArch64::ADDXri), TPIDR2)
@@ -472,7 +470,6 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB,
472
470
bool ClearTPIDR2) {
473
471
DebugLoc DL = getDebugLoc (MBB, MBBI);
474
472
475
- // Clear TPIDR2.
476
473
if (ClearTPIDR2)
477
474
BuildMI (MBB, MBBI, DL, TII->get (AArch64::MSR))
478
475
.addImm (AArch64SysReg::TPIDR2_EL0)
@@ -536,9 +533,10 @@ void MachineSMEABI::emitAllocateLazySaveBuffer(
536
533
}
537
534
}
538
535
536
+ static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111 ;
539
537
static void emitZeroZA (const TargetInstrInfo &TII, DebugLoc DL,
540
538
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
541
- unsigned Mask) {
539
+ unsigned Mask = ZERO_ALL_ZA_MASK ) {
542
540
MachineInstrBuilder MIB =
543
541
BuildMI (MBB, MBBI, DL, TII.get (AArch64::ZERO_M)).addImm (Mask);
544
542
for (unsigned I = 0 ; I < 8 ; I++) {
@@ -569,9 +567,9 @@ void MachineSMEABI::emitNewZAPrologue(MachineBasicBlock &MBB,
569
567
BuildMI (MBB, MBBI, DL, TII->get (AArch64::MSRpstatesvcrImm1))
570
568
.addImm (AArch64SVCR::SVCRZA)
571
569
.addImm (1 );
572
- // Zero ZA. Note: ZA state may new be needed for new ZT0 functions .
570
+ // NOTE: Functions that only use ZT0 don't need to zero ZA .
573
571
if (MF->getInfo <AArch64FunctionInfo>()->getSMEFnAttrs ().hasZAState ())
574
- emitZeroZA (*TII, DL, MBB, MBBI, /* Mask= */ 0b11111111 );
572
+ emitZeroZA (*TII, DL, MBB, MBBI);
575
573
}
576
574
577
575
void MachineSMEABI::emitStateChange (MachineBasicBlock &MBB,
@@ -583,9 +581,14 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
583
581
if (From == ZAState::ANY || To == ZAState::ANY)
584
582
return ;
585
583
584
+ // If we're exiting from the CALLER_DORMANT state that means this new ZA
585
+ // function did not touch ZA (so ZA was never turned on).
586
+ if (From == ZAState::CALLER_DORMANT && To == ZAState::OFF)
587
+ return ;
588
+
586
589
// TODO: Avoid setting up the save buffer if there's no transition to
587
590
// LOCAL_SAVED.
588
- if (From == ZAState::CALLER_DORMANT && To != ZAState::OFF ) {
591
+ if (From == ZAState::CALLER_DORMANT) {
589
592
assert (MBB.getParent ()
590
593
->getInfo <AArch64FunctionInfo>()
591
594
->getSMEFnAttrs ()
@@ -598,7 +601,7 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
598
601
return ; // Nothing more to do (ZA is active after the prologue).
599
602
600
603
// Note: "emitNewZAPrologue" zeros ZA, so we may need to setup a lazy save
601
- // if "To" is "ZAState::LOCAL_SAVED". If may be possible to improve this
604
+ // if "To" is "ZAState::LOCAL_SAVED". It may be possible to improve this
602
605
// case by changing the placement of the zero instruction.
603
606
From = ZAState::ACTIVE;
604
607
}
@@ -608,10 +611,9 @@ void MachineSMEABI::emitStateChange(MachineBasicBlock &MBB,
608
611
else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE)
609
612
emitRestoreLazySave (MBB, InsertPt, PhysLiveRegs);
610
613
else if (To == ZAState::OFF) {
611
- // If we're exiting from the CALLER_DORMANT state that means this new ZA
612
- // function did not touch ZA (so ZA was never turned on).
613
- if (From != ZAState::CALLER_DORMANT)
614
- emitZAOff (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED);
614
+ assert (From != ZAState::CALLER_DORMANT &&
615
+ " CALLER_DORMANT to OFF should have already been handled" );
616
+ emitZAOff (MBB, InsertPt, /* ClearTPIDR2=*/ From == ZAState::LOCAL_SAVED);
615
617
} else {
616
618
dbgs () << " Error: Transition from " << getZAStateString (From) << " to "
617
619
<< getZAStateString (To) << ' \n ' ;
0 commit comments