Skip to content

Commit 6ec02f7

Browse files
[AArch64] Refactor redundant PTEST optimisations (NFC) (#87802)
This patch refactors `AArch64InstrInfo::optimizePTestInstr` to simplify the convoluted conditions and control flow and make it easier to add the optimisation in #81141
1 parent 18000fe commit 6ec02f7

File tree

2 files changed

+96
-78
lines changed

2 files changed

+96
-78
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+93-78
Original file line numberDiff line numberDiff line change
@@ -1355,48 +1355,52 @@ static bool areCFlagsAccessedBetweenInstrs(
13551355
return false;
13561356
}
13571357

1358-
/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1359-
/// operation which could set the flags in an identical manner
1360-
bool AArch64InstrInfo::optimizePTestInstr(
1361-
MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1362-
const MachineRegisterInfo *MRI) const {
1363-
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1364-
auto *Pred = MRI->getUniqueVRegDef(PredReg);
1365-
auto NewOp = Pred->getOpcode();
1366-
bool OpChanged = false;
1367-
1358+
std::optional<unsigned>
1359+
AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1360+
MachineInstr *Pred,
1361+
const MachineRegisterInfo *MRI) const {
13681362
unsigned MaskOpcode = Mask->getOpcode();
13691363
unsigned PredOpcode = Pred->getOpcode();
13701364
bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
13711365
bool PredIsWhileLike = isWhileOpcode(PredOpcode);
13721366

1373-
if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1374-
getElementSizeForOpcode(MaskOpcode) ==
1375-
getElementSizeForOpcode(PredOpcode) &&
1376-
Mask->getOperand(1).getImm() == 31) {
1367+
if (PredIsWhileLike) {
1368+
// For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1369+
// instruction and the condition is "any" since WHILcc does an implicit
1370+
// PTEST(ALL, PG) check and PG is always a subset of ALL.
1371+
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1372+
return PredOpcode;
1373+
13771374
// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
13781375
// redundant since WHILE performs an implicit PTEST with an all active
1379-
// mask. Must be an all active predicate of matching element size.
1376+
// mask.
1377+
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1378+
getElementSizeForOpcode(MaskOpcode) ==
1379+
getElementSizeForOpcode(PredOpcode))
1380+
return PredOpcode;
1381+
1382+
return {};
1383+
}
1384+
1385+
if (PredIsPTestLike) {
1386+
// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1387+
// instruction that sets the flags as PTEST would and the condition is
1388+
// "any" since PG is always a subset of the governing predicate of the
1389+
// ptest-like instruction.
1390+
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1391+
return PredOpcode;
13801392

13811393
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1382-
// PTEST_LIKE instruction uses the same all active mask and the element
1383-
// size matches. If the PTEST has a condition of any then it is always
1384-
// redundant.
1385-
if (PredIsPTestLike) {
1394+
// the element size matches and either the PTEST_LIKE instruction uses
1395+
// the same all active mask or the condition is "any".
1396+
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1397+
getElementSizeForOpcode(MaskOpcode) ==
1398+
getElementSizeForOpcode(PredOpcode)) {
13861399
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1387-
if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1388-
return false;
1400+
if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1401+
return PredOpcode;
13891402
}
13901403

1391-
// Fallthough to simply remove the PTEST.
1392-
} else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1393-
PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1394-
// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1395-
// instruction that sets the flags as PTEST would. This is only valid when
1396-
// the condition is any.
1397-
1398-
// Fallthough to simply remove the PTEST.
1399-
} else if (PredIsPTestLike) {
14001404
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
14011405
// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
14021406
// on 8-bit predicates like the PTEST. Otherwise, for instructions like
@@ -1421,55 +1425,66 @@ bool AArch64InstrInfo::optimizePTestInstr(
14211425
// identical regardless of element size.
14221426
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
14231427
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1424-
if ((Mask != PTestLikeMask) ||
1425-
(PredElementSize != AArch64::ElementSizeB &&
1426-
PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1427-
return false;
1428+
if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1429+
PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1430+
return PredOpcode;
14281431

1429-
// Fallthough to simply remove the PTEST.
1430-
} else {
1431-
// If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1432-
// opcode so the PTEST becomes redundant.
1433-
switch (PredOpcode) {
1434-
case AArch64::AND_PPzPP:
1435-
case AArch64::BIC_PPzPP:
1436-
case AArch64::EOR_PPzPP:
1437-
case AArch64::NAND_PPzPP:
1438-
case AArch64::NOR_PPzPP:
1439-
case AArch64::ORN_PPzPP:
1440-
case AArch64::ORR_PPzPP:
1441-
case AArch64::BRKA_PPzP:
1442-
case AArch64::BRKPA_PPzPP:
1443-
case AArch64::BRKB_PPzP:
1444-
case AArch64::BRKPB_PPzPP:
1445-
case AArch64::RDFFR_PPz: {
1446-
// Check to see if our mask is the same. If not the resulting flag bits
1447-
// may be different and we can't remove the ptest.
1448-
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1449-
if (Mask != PredMask)
1450-
return false;
1451-
break;
1452-
}
1453-
case AArch64::BRKN_PPzP: {
1454-
// BRKN uses an all active implicit mask to set flags unlike the other
1455-
// flag-setting instructions.
1456-
// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1457-
if ((MaskOpcode != AArch64::PTRUE_B) ||
1458-
(Mask->getOperand(1).getImm() != 31))
1459-
return false;
1460-
break;
1461-
}
1462-
case AArch64::PTRUE_B:
1463-
// PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1464-
break;
1465-
default:
1466-
// Bail out if we don't recognize the input
1467-
return false;
1468-
}
1432+
return {};
1433+
}
14691434

1470-
NewOp = convertToFlagSettingOpc(PredOpcode);
1471-
OpChanged = true;
1435+
// If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1436+
// opcode so the PTEST becomes redundant.
1437+
switch (PredOpcode) {
1438+
case AArch64::AND_PPzPP:
1439+
case AArch64::BIC_PPzPP:
1440+
case AArch64::EOR_PPzPP:
1441+
case AArch64::NAND_PPzPP:
1442+
case AArch64::NOR_PPzPP:
1443+
case AArch64::ORN_PPzPP:
1444+
case AArch64::ORR_PPzPP:
1445+
case AArch64::BRKA_PPzP:
1446+
case AArch64::BRKPA_PPzPP:
1447+
case AArch64::BRKB_PPzP:
1448+
case AArch64::BRKPB_PPzPP:
1449+
case AArch64::RDFFR_PPz: {
1450+
// Check to see if our mask is the same. If not the resulting flag bits
1451+
// may be different and we can't remove the ptest.
1452+
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1453+
if (Mask != PredMask)
1454+
return {};
1455+
break;
14721456
}
1457+
case AArch64::BRKN_PPzP: {
1458+
// BRKN uses an all active implicit mask to set flags unlike the other
1459+
// flag-setting instructions.
1460+
// PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1461+
if ((MaskOpcode != AArch64::PTRUE_B) ||
1462+
(Mask->getOperand(1).getImm() != 31))
1463+
return {};
1464+
break;
1465+
}
1466+
case AArch64::PTRUE_B:
1467+
// PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1468+
break;
1469+
default:
1470+
// Bail out if we don't recognize the input
1471+
return {};
1472+
}
1473+
1474+
return convertToFlagSettingOpc(PredOpcode);
1475+
}
1476+
1477+
/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1478+
/// operation which could set the flags in an identical manner
1479+
bool AArch64InstrInfo::optimizePTestInstr(
1480+
MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1481+
const MachineRegisterInfo *MRI) const {
1482+
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1483+
auto *Pred = MRI->getUniqueVRegDef(PredReg);
1484+
unsigned PredOpcode = Pred->getOpcode();
1485+
auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1486+
if (!NewOp)
1487+
return false;
14731488

14741489
const TargetRegisterInfo *TRI = &getRegisterInfo();
14751490

@@ -1482,9 +1497,9 @@ bool AArch64InstrInfo::optimizePTestInstr(
14821497
// as they are prior to PTEST. Sometimes this requires the tested PTEST
14831498
// operand to be replaced with an equivalent instruction that also sets the
14841499
// flags.
1485-
Pred->setDesc(get(NewOp));
14861500
PTest->eraseFromParent();
1487-
if (OpChanged) {
1501+
if (*NewOp != PredOpcode) {
1502+
Pred->setDesc(get(*NewOp));
14881503
bool succeeded = UpdateOperandRegClass(*Pred);
14891504
(void)succeeded;
14901505
assert(succeeded && "Operands have incompatible register classes!");

llvm/lib/Target/AArch64/AArch64InstrInfo.h

+3
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
572572
bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
573573
unsigned PredReg,
574574
const MachineRegisterInfo *MRI) const;
575+
std::optional<unsigned>
576+
canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
577+
MachineInstr *Pred, const MachineRegisterInfo *MRI) const;
575578
};
576579

577580
struct UsedNZCV {

0 commit comments

Comments
 (0)