diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c096711bf3bd..e4715018d84ca 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25026,6 +25026,30 @@ static SDValue performCSELCombine(SDNode *N, if (SDValue Folded = foldCSELofCTTZ(N, DAG)) return Folded; + // CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x) + // if SUB(y, x) already exists and we can produce a swapped predicate for cc. + SDValue Cond = N->getOperand(3); + if (DCI.isAfterLegalizeDAG() && Cond.getOpcode() == AArch64ISD::SUBS && + Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) && + DAG.doesNodeExist(ISD::SUB, N->getVTList(), + {Cond.getOperand(1), Cond.getOperand(0)}) && + !DAG.doesNodeExist(ISD::SUB, N->getVTList(), + {Cond.getOperand(0), Cond.getOperand(1)}) && + !isNullConstant(Cond.getOperand(1))) { + AArch64CC::CondCode OldCond = + static_cast(N->getConstantOperandVal(2)); + AArch64CC::CondCode NewCond = getSwappedCondition(OldCond); + if (NewCond != AArch64CC::AL) { + SDLoc DL(N); + SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), + Cond.getOperand(1), Cond.getOperand(0)); + return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0), + N->getOperand(1), + DAG.getConstant(NewCond, DL, MVT::i32), + Sub.getValue(1)); + } + } + return performCONDCombine(N, DCI, DAG, 2, 3); } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index b8d323649feaa..9671fa3b3d92f 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -306,6 +306,36 @@ inline static CondCode getInvertedCondCode(CondCode Code) { return static_cast(static_cast(Code) ^ 0x1); } +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static CondCode getSwappedCondition(CondCode CC) { + switch (CC) { + default: + return AL; + case EQ: + return EQ; + case NE: + return NE; + case HS: + return LS; + case LO: + return HI; + case HI: + return LO; + case LS: + return HS; + case GE: + return LE; + case LT: + return GT; + case GT: + return LT; + case LE: + return GE; + } +} + /// Given a condition code, return NZCV flags that would satisfy that condition. /// The flag bits are in the format expected by the ccmp instructions. /// Note that many different flag settings can satisfy a given condition code, diff --git a/llvm/test/CodeGen/AArch64/adds_cmn.ll b/llvm/test/CodeGen/AArch64/adds_cmn.ll index 674a3893653a1..7f1cb0df049b1 100644 --- a/llvm/test/CodeGen/AArch64/adds_cmn.ll +++ b/llvm/test/CodeGen/AArch64/adds_cmn.ll @@ -62,10 +62,8 @@ entry: define { i32, i32 } @subs_cmp_c(i32 noundef %x, i32 noundef %y) { ; CHECK-LABEL: subs_cmp_c: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: sub w1, w1, w0 -; CHECK-NEXT: cset w8, hs -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: subs w1, w1, w0 +; CHECK-NEXT: cset w0, ls ; CHECK-NEXT: ret entry: %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y) diff --git a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll index 7c628cf1683d6..3971da27cdddc 100644 --- a/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll +++ b/llvm/test/CodeGen/AArch64/csel-subs-swapped.ll @@ -5,8 +5,7 @@ define i32 @eq_i32(i32 %x) { ; CHECK-LABEL: eq_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 +; CHECK-NEXT: subs w8, w8, w0 ; CHECK-NEXT: csel w0, w0, w8, eq ; CHECK-NEXT: ret %cmp = icmp eq i32 %x, -2097152 @@ -19,8 +18,7 @@ define i32 @ne_i32(i32 %x) { ; CHECK-LABEL: ne_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 +; CHECK-NEXT: subs w8, w8, w0 ; CHECK-NEXT: csel w0, w0, w8, ne ; CHECK-NEXT: ret %cmp = icmp ne i32 %x, -2097152 @@ -33,9 +31,8 @@ define i32 @sgt_i32(i32 %x) { ; CHECK-LABEL: sgt_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: csel w0, w0, w8, gt +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: csel w0, w0, w8, lt ; CHECK-NEXT: ret %cmp = icmp sgt i32 %x, -2097152 %sub = sub i32 -2097152, %x @@ -62,9 +59,8 @@ define i32 @slt_i32(i32 %x) { ; CHECK-LABEL: slt_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: csel w0, w0, w8, lt +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: csel w0, w0, w8, gt ; CHECK-NEXT: ret %cmp = icmp slt i32 %x, -2097152 %sub = sub i32 -2097152, %x @@ -91,9 +87,8 @@ define i32 @ugt_i32(i32 %x) { ; CHECK-LABEL: ugt_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: csel w0, w0, w8, hi +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: csel w0, w0, w8, lo ; CHECK-NEXT: ret %cmp = icmp ugt i32 %x, -2097152 %sub = sub i32 -2097152, %x @@ -120,9 +115,8 @@ define i32 @ult_i32(i32 %x) { ; CHECK-LABEL: ult_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000 -; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: csel w0, w0, w8, lo +; CHECK-NEXT: subs w8, w8, w0 +; CHECK-NEXT: csel w0, w0, w8, hi ; CHECK-NEXT: ret %cmp = icmp ult i32 %x, -2097152 %sub = sub i32 -2097152, %x @@ -150,8 +144,7 @@ define i64 @eq_i64(i64 %x) { ; CHECK-LABEL: eq_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 +; CHECK-NEXT: subs x8, x8, x0 ; CHECK-NEXT: csel x0, x0, x8, eq ; CHECK-NEXT: ret %cmp = icmp eq i64 %x, 100 @@ -164,8 +157,7 @@ define i64 @ne_i64(i64 %x) { ; CHECK-LABEL: ne_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 +; CHECK-NEXT: subs x8, x8, x0 ; CHECK-NEXT: csel x0, x0, x8, ne ; CHECK-NEXT: ret %cmp = icmp ne i64 %x, 100 @@ -178,9 +170,8 @@ define i64 @sgt_i64(i64 %x) { ; CHECK-LABEL: sgt_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 -; CHECK-NEXT: csel x0, x0, x8, gt +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x0, x8, lt ; CHECK-NEXT: ret %cmp = icmp sgt i64 %x, 100 %sub = sub i64 100, %x @@ -206,9 +197,8 @@ define i64 @slt_i64(i64 %x) { ; CHECK-LABEL: slt_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 -; CHECK-NEXT: csel x0, x0, x8, lt +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x0, x8, gt ; CHECK-NEXT: ret %cmp = icmp slt i64 %x, 100 %sub = sub i64 100, %x @@ -234,9 +224,8 @@ define i64 @ugt_i64(i64 %x) { ; CHECK-LABEL: ugt_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 -; CHECK-NEXT: csel x0, x0, x8, hi +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x0, x8, lo ; CHECK-NEXT: ret %cmp = icmp ugt i64 %x, 100 %sub = sub i64 100, %x @@ -262,9 +251,8 @@ define i64 @ult_i64(i64 %x) { ; CHECK-LABEL: ult_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cmp x0, #100 -; CHECK-NEXT: sub x8, x8, x0 -; CHECK-NEXT: csel x0, x0, x8, lo +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x0, x8, hi ; CHECK-NEXT: ret %cmp = icmp ult i64 %x, 100 %sub = sub i64 100, %x