Skip to content

Commit 3990749

Browse files
[AArch64] Optimise test of the LSB of a paired whileCC instruction
Change-Id: Iefc0eb7e4b90715ae08c154dde5bda1091f9de07
1 parent 07e231b commit 3990749

File tree

7 files changed

+109
-62
lines changed

7 files changed

+109
-62
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
27272727
MAKE_CASE(AArch64ISD::INSR)
27282728
MAKE_CASE(AArch64ISD::PTEST)
27292729
MAKE_CASE(AArch64ISD::PTEST_ANY)
2730+
MAKE_CASE(AArch64ISD::PTEST_FIRST)
27302731
MAKE_CASE(AArch64ISD::PTRUE)
27312732
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
27322733
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
@@ -18733,21 +18734,41 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
1873318734
AArch64CC::CondCode Cond);
1873418735

1873518736
static bool isPredicateCCSettingOp(SDValue N) {
18736-
if ((N.getOpcode() == ISD::SETCC) ||
18737-
(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
18738-
(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
18739-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
18740-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
18741-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
18742-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
18743-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
18744-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
18745-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
18746-
// get_active_lane_mask is lowered to a whilelo instruction.
18747-
N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
18737+
if (N.getOpcode() == ISD::SETCC)
1874818738
return true;
1874918739

18750-
return false;
18740+
if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18741+
isNullConstant(N.getOperand(1)))
18742+
N = N.getOperand(0);
18743+
18744+
if (N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
18745+
return false;
18746+
18747+
switch (N.getConstantOperandVal(0)) {
18748+
default:
18749+
return false;
18750+
case Intrinsic::aarch64_sve_whilege_x2:
18751+
case Intrinsic::aarch64_sve_whilegt_x2:
18752+
case Intrinsic::aarch64_sve_whilehi_x2:
18753+
case Intrinsic::aarch64_sve_whilehs_x2:
18754+
case Intrinsic::aarch64_sve_whilele_x2:
18755+
case Intrinsic::aarch64_sve_whilelo_x2:
18756+
case Intrinsic::aarch64_sve_whilels_x2:
18757+
case Intrinsic::aarch64_sve_whilelt_x2:
18758+
if (N.getResNo() != 0)
18759+
return false;
18760+
[[fallthrough]];
18761+
case Intrinsic::aarch64_sve_whilege:
18762+
case Intrinsic::aarch64_sve_whilegt:
18763+
case Intrinsic::aarch64_sve_whilehi:
18764+
case Intrinsic::aarch64_sve_whilehs:
18765+
case Intrinsic::aarch64_sve_whilele:
18766+
case Intrinsic::aarch64_sve_whilelo:
18767+
case Intrinsic::aarch64_sve_whilels:
18768+
case Intrinsic::aarch64_sve_whilelt:
18769+
case Intrinsic::get_active_lane_mask:
18770+
return true;
18771+
}
1875118772
}
1875218773

1875318774
// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
@@ -20666,9 +20687,19 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
2066620687
}
2066720688

2066820689
// Set condition code (CC) flags.
20669-
SDValue Test = DAG.getNode(
20670-
Cond == AArch64CC::ANY_ACTIVE ? AArch64ISD::PTEST_ANY : AArch64ISD::PTEST,
20671-
DL, MVT::Other, Pg, Op);
20690+
AArch64ISD::NodeType NT;
20691+
switch (Cond) {
20692+
default:
20693+
NT = AArch64ISD::PTEST;
20694+
break;
20695+
case AArch64CC::ANY_ACTIVE:
20696+
NT = AArch64ISD::PTEST_ANY;
20697+
break;
20698+
case AArch64CC::FIRST_ACTIVE:
20699+
NT = AArch64ISD::PTEST_FIRST;
20700+
break;
20701+
}
20702+
SDValue Test = DAG.getNode(NT, DL, MVT::Other, Pg, Op);
2067220703

2067320704
// Convert CC to integer based on requested condition.
2067420705
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ enum NodeType : unsigned {
359359
INSR,
360360
PTEST,
361361
PTEST_ANY,
362+
PTEST_FIRST,
362363
PTRUE,
363364

364365
CTTZ_ELTS,

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,6 +1184,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
11841184
break;
11851185
case AArch64::PTEST_PP:
11861186
case AArch64::PTEST_PP_ANY:
1187+
case AArch64::PTEST_PP_FIRST:
11871188
SrcReg = MI.getOperand(0).getReg();
11881189
SrcReg2 = MI.getOperand(1).getReg();
11891190
// Not sure about the mask and value for now...
@@ -1355,12 +1356,25 @@ static bool areCFlagsAccessedBetweenInstrs(
13551356
return false;
13561357
}
13571358

1358-
std::optional<unsigned>
1359+
std::optional<std::pair<unsigned, MachineInstr *>>
13591360
AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
13601361
MachineInstr *Pred,
13611362
const MachineRegisterInfo *MRI) const {
13621363
unsigned MaskOpcode = Mask->getOpcode();
13631364
unsigned PredOpcode = Pred->getOpcode();
1365+
1366+
// Handle a COPY from the LSB of the results of paired WHILEcc instruction.
1367+
if ((PredOpcode == TargetOpcode::COPY &&
1368+
Pred->getOperand(1).getSubReg() == AArch64::psub0) ||
1369+
// Handle unpack of the LSB of the result of a WHILEcc instruction.
1370+
PredOpcode == AArch64::PUNPKLO_PP) {
1371+
MachineInstr *MI = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1372+
if (MI && isWhileOpcode(MI->getOpcode())) {
1373+
Pred = MI;
1374+
PredOpcode = MI->getOpcode();
1375+
}
1376+
}
1377+
13641378
bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
13651379
bool PredIsWhileLike = isWhileOpcode(PredOpcode);
13661380

@@ -1369,15 +1383,16 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
13691383
// instruction and the condition is "any" since WHILcc does an implicit
13701384
// PTEST(ALL, PG) check and PG is always a subset of ALL.
13711385
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1372-
return PredOpcode;
1386+
return std::make_pair(PredOpcode, Pred);
13731387

1374-
// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1375-
// redundant since WHILE performs an implicit PTEST with an all active
1376-
// mask.
1388+
// For PTEST(PTRUE_ALL, WHILE), since WHILE performs an implicit PTEST
1389+
// with an all active mask, the PTEST is redundant if ether the element
1390+
// size matches or the PTEST condition is "first".
13771391
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1378-
getElementSizeForOpcode(MaskOpcode) ==
1379-
getElementSizeForOpcode(PredOpcode))
1380-
return PredOpcode;
1392+
(PTest->getOpcode() == AArch64::PTEST_PP_FIRST ||
1393+
getElementSizeForOpcode(MaskOpcode) ==
1394+
getElementSizeForOpcode(PredOpcode)))
1395+
return std::make_pair(PredOpcode, Pred);
13811396

13821397
return {};
13831398
}
@@ -1388,7 +1403,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
13881403
// "any" since PG is always a subset of the governing predicate of the
13891404
// ptest-like instruction.
13901405
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1391-
return PredOpcode;
1406+
return std::make_pair(PredOpcode, Pred);
13921407

13931408
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
13941409
// the element size matches and either the PTEST_LIKE instruction uses
@@ -1398,7 +1413,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
13981413
getElementSizeForOpcode(PredOpcode)) {
13991414
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
14001415
if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1401-
return PredOpcode;
1416+
return std::make_pair(PredOpcode, Pred);
14021417
}
14031418

14041419
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
@@ -1427,7 +1442,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
14271442
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
14281443
if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
14291444
PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1430-
return PredOpcode;
1445+
return std::make_pair(PredOpcode, Pred);
14311446

14321447
return {};
14331448
}
@@ -1471,7 +1486,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
14711486
return {};
14721487
}
14731488

1474-
return convertToFlagSettingOpc(PredOpcode);
1489+
return std::make_pair(convertToFlagSettingOpc(PredOpcode), Pred);
14751490
}
14761491

14771492
/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
@@ -1481,10 +1496,12 @@ bool AArch64InstrInfo::optimizePTestInstr(
14811496
const MachineRegisterInfo *MRI) const {
14821497
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
14831498
auto *Pred = MRI->getUniqueVRegDef(PredReg);
1499+
unsigned NewOp;
14841500
unsigned PredOpcode = Pred->getOpcode();
1485-
auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1486-
if (!NewOp)
1501+
auto canRemove = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1502+
if (!canRemove)
14871503
return false;
1504+
std::tie(NewOp, Pred) = *canRemove;
14881505

14891506
const TargetRegisterInfo *TRI = &getRegisterInfo();
14901507

@@ -1498,8 +1515,8 @@ bool AArch64InstrInfo::optimizePTestInstr(
14981515
// operand to be replaced with an equivalent instruction that also sets the
14991516
// flags.
15001517
PTest->eraseFromParent();
1501-
if (*NewOp != PredOpcode) {
1502-
Pred->setDesc(get(*NewOp));
1518+
if (NewOp != PredOpcode) {
1519+
Pred->setDesc(get(NewOp));
15031520
bool succeeded = UpdateOperandRegClass(*Pred);
15041521
(void)succeeded;
15051522
assert(succeeded && "Operands have incompatible register classes!");
@@ -1560,7 +1577,8 @@ bool AArch64InstrInfo::optimizeCompareInstr(
15601577
}
15611578

15621579
if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1563-
CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1580+
CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1581+
CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
15641582
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
15651583

15661584
if (SrcReg2 != 0)

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
572572
bool optimizePTestInstr(MachineInstr *PTest, unsigned MaskReg,
573573
unsigned PredReg,
574574
const MachineRegisterInfo *MRI) const;
575-
std::optional<unsigned>
575+
576+
std::optional<std::pair<unsigned, MachineInstr *>>
576577
canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
577578
MachineInstr *Pred, const MachineRegisterInfo *MRI) const;
578579
};

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,10 @@ def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
373373
(AArch64fadda_p_node (SVEAllActive), node:$op2,
374374
(vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>;
375375

376-
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
377-
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
378-
def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>;
376+
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
377+
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
378+
def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>;
379+
def AArch64ptest_first : SDNode<"AArch64ISD::PTEST_FIRST", SDT_AArch64PTest>;
379380

380381
def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3,
381382
[SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0, 1>]>;
@@ -948,7 +949,7 @@ let Predicates = [HasSVEorSME] in {
948949
defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>;
949950
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
950951

951-
defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any>;
952+
defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any, AArch64ptest_first>;
952953
defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
953954
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
954955
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -784,13 +784,16 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
784784
}
785785

786786
multiclass sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op,
787-
SDPatternOperator op_any> {
787+
SDPatternOperator op_any, SDPatternOperator op_first> {
788788
def NAME : sve_int_ptest<opc, asm, op>;
789789

790790
let hasNoSchedulingInfo = 1, isCompare = 1, Defs = [NZCV] in {
791791
def _ANY : Pseudo<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
792792
[(op_any (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>,
793793
PseudoInstExpansion<(!cast<Instruction>(NAME) PPRAny:$Pg, PPR8:$Pn)>;
794+
def _FIRST : Pseudo<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
795+
[(op_first (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>,
796+
PseudoInstExpansion<(!cast<Instruction>(NAME) PPRAny:$Pg, PPR8:$Pn)>;
794797
}
795798
}
796799

@@ -9669,7 +9672,7 @@ multiclass sve2p1_int_while_rr_pn<string mnemonic, bits<3> opc> {
96699672

96709673
// SVE integer compare scalar count and limit (predicate pair)
96719674
class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
9672-
RegisterOperand ppr_ty>
9675+
RegisterOperand ppr_ty, ElementSizeEnum EltSz>
96739676
: I<(outs ppr_ty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
96749677
mnemonic, "\t$Pd, $Rn, $Rm",
96759678
"", []>, Sched<[]> {
@@ -9687,16 +9690,18 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
96879690
let Inst{3-1} = Pd;
96889691
let Inst{0} = opc{0};
96899692

9693+
let ElementSize = EltSz;
96909694
let Defs = [NZCV];
96919695
let hasSideEffects = 0;
9696+
let isWhile = 1;
96929697
}
96939698

96949699

96959700
multiclass sve2p1_int_while_rr_pair<string mnemonic, bits<3> opc> {
9696-
def _B : sve2p1_int_while_rr_pair<mnemonic, 0b00, opc, PP_b_mul_r>;
9697-
def _H : sve2p1_int_while_rr_pair<mnemonic, 0b01, opc, PP_h_mul_r>;
9698-
def _S : sve2p1_int_while_rr_pair<mnemonic, 0b10, opc, PP_s_mul_r>;
9699-
def _D : sve2p1_int_while_rr_pair<mnemonic, 0b11, opc, PP_d_mul_r>;
9701+
def _B : sve2p1_int_while_rr_pair<mnemonic, 0b00, opc, PP_b_mul_r, ElementSizeB>;
9702+
def _H : sve2p1_int_while_rr_pair<mnemonic, 0b01, opc, PP_h_mul_r, ElementSizeH>;
9703+
def _S : sve2p1_int_while_rr_pair<mnemonic, 0b10, opc, PP_s_mul_r, ElementSizeS>;
9704+
def _D : sve2p1_int_while_rr_pair<mnemonic, 0b11, opc, PP_d_mul_r, ElementSizeD>;
97009705
}
97019706

97029707

llvm/test/CodeGen/AArch64/opt-while-test.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ define void @f_while(i32 %i, i32 %n) #0 {
88
; CHECK: // %bb.0: // %E
99
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1010
; CHECK-NEXT: whilelo p0.b, w0, w1
11-
; CHECK-NEXT: punpklo p0.h, p0.b
12-
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
13-
; CHECK-NEXT: fmov w8, s0
14-
; CHECK-NEXT: tbz w8, #0, .LBB0_2
11+
; CHECK-NEXT: b.pl .LBB0_2
1512
; CHECK-NEXT: // %bb.1: // %A
1613
; CHECK-NEXT: bl g0
1714
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -25,10 +22,7 @@ define void @f_while(i32 %i, i32 %n) #0 {
2522
; CHECK-SVE2p1: // %bb.0: // %E
2623
; CHECK-SVE2p1-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
2724
; CHECK-SVE2p1-NEXT: whilelo p0.b, w0, w1
28-
; CHECK-SVE2p1-NEXT: punpklo p0.h, p0.b
29-
; CHECK-SVE2p1-NEXT: mov z0.h, p0/z, #1 // =0x1
30-
; CHECK-SVE2p1-NEXT: fmov w8, s0
31-
; CHECK-SVE2p1-NEXT: tbz w8, #0, .LBB0_2
25+
; CHECK-SVE2p1-NEXT: b.pl .LBB0_2
3226
; CHECK-SVE2p1-NEXT: // %bb.1: // %A
3327
; CHECK-SVE2p1-NEXT: bl g0
3428
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -54,12 +48,9 @@ define void @f_while_x2(i32 %i, i32 %n) #0 {
5448
; CHECK-LABEL: f_while_x2:
5549
; CHECK: // %bb.0: // %E
5650
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
57-
; CHECK-NEXT: whilelo p1.b, w0, w1
58-
; CHECK-NEXT: punpkhi p0.h, p1.b
59-
; CHECK-NEXT: punpklo p1.h, p1.b
60-
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
61-
; CHECK-NEXT: fmov w8, s0
62-
; CHECK-NEXT: tbz w8, #0, .LBB1_2
51+
; CHECK-NEXT: whilelo p0.b, w0, w1
52+
; CHECK-NEXT: punpkhi p0.h, p0.b
53+
; CHECK-NEXT: b.pl .LBB1_2
6354
; CHECK-NEXT: // %bb.1: // %A
6455
; CHECK-NEXT: bl g0
6556
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -75,15 +66,14 @@ define void @f_while_x2(i32 %i, i32 %n) #0 {
7566
; CHECK-SVE2p1-NEXT: mov w8, w1
7667
; CHECK-SVE2p1-NEXT: mov w9, w0
7768
; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x9, x8
78-
; CHECK-SVE2p1-NEXT: mov z0.h, p0/z, #1 // =0x1
79-
; CHECK-SVE2p1-NEXT: mov p0.b, p1.b
80-
; CHECK-SVE2p1-NEXT: fmov w8, s0
81-
; CHECK-SVE2p1-NEXT: tbz w8, #0, .LBB1_2
69+
; CHECK-SVE2p1-NEXT: b.pl .LBB1_2
8270
; CHECK-SVE2p1-NEXT: // %bb.1: // %A
71+
; CHECK-SVE2p1-NEXT: mov p0.b, p1.b
8372
; CHECK-SVE2p1-NEXT: bl g0
8473
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
8574
; CHECK-SVE2p1-NEXT: ret
8675
; CHECK-SVE2p1-NEXT: .LBB1_2: // %B
76+
; CHECK-SVE2p1-NEXT: mov p0.b, p1.b
8777
; CHECK-SVE2p1-NEXT: bl g1
8878
; CHECK-SVE2p1-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
8979
; CHECK-SVE2p1-NEXT: ret

0 commit comments

Comments
 (0)