From 8f13cfbbd2031de67255d2b7638e61c033d5b53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Thu, 11 Jan 2024 14:17:18 +0100 Subject: [PATCH 1/4] [GlobalIsel] Combine logic of icmps Inspired by InstCombinerImpl::foldAndOrOfICmpsUsingRanges with some adaptions to MIR. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 12 + .../CodeGen/GlobalISel/GenericMachineInstrs.h | 128 +++++++++++ .../include/llvm/Target/GlobalISel/Combine.td | 14 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 176 ++++++++++++++ .../GlobalISel/combine-logic-of-compare.mir | 214 ++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 46 +--- .../CodeGen/AMDGPU/llvm.is.fpclass.f16.ll | 104 ++++----- 7 files changed, 595 insertions(+), 99 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index a6e9406bed06a..3a0a7bea90578 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -814,6 +814,12 @@ class CombinerHelper { /// Combine selects. bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Combine ands, + bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Combine ors, + bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; @@ -919,6 +925,12 @@ class CombinerHelper { bool AllowUndefs); std::optional getConstantOrConstantSplatVector(Register Src); + + /// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2) + /// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2) + /// into a single comparison using range-based reasoning. + bool tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, + BuildFnTy &MatchInfo); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 14885d5f9d08e..c69769cf4ee35 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -577,6 +577,134 @@ class GPhi : public GenericMachineInstr { } }; +/// Represents a binary operation, i.e, x = y op z. +class GBinOp : public GenericMachineInstr { +public: + Register getLHSReg() const { return getReg(1); } + Register getRHSReg() const { return getReg(2); } + + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + // Integer. + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: + // Floating point. + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FPOW: + // Logical. + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + return true; + default: + return false; + } + }; +}; + +/// Represents an integer binary operation. +class GIntBinOp : public GBinOp { +public: + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: + return true; + default: + return false; + } + }; +}; + +/// Represents a floating point binary operation. +class GFBinOp : public GBinOp { +public: + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FPOW: + return true; + default: + return false; + } + }; +}; + +/// Represents a logical binary operation. +class GLogicalBinOp : public GBinOp { +public: + static bool classof(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + return true; + default: + return false; + } + }; +}; + +/// Represents an integer addition. +class GAdd : public GIntBinOp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_ADD; + }; +}; + +/// Represents a logical and. +class GAnd : public GLogicalBinOp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_AND; + }; +}; + +/// Represents a logical or. +class GOr : public GLogicalBinOp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_OR; + }; +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 6bda80681432a..9b0e1b0d7c4f9 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1241,6 +1241,18 @@ def match_selects : GICombineRule< [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +def match_ands : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchAnd(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + +def match_ors : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchOr(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1314,7 +1326,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, redundant_binop_in_equality, - fsub_to_fneg, commute_constant_to_rhs]>; + fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index fc2793bd7a133..b65c1914d6fde 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/Support/Casting.h" @@ -6643,3 +6644,178 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { return false; } + +/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2) +/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2) +/// into a single comparison using range-based reasoning. +/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges. +bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, + BuildFnTy &MatchInfo) { + assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor"); + bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND; + Register DstReg = Logic->getReg(0); + Register LHS = Logic->getLHSReg(); + Register RHS = Logic->getRHSReg(); + unsigned Flags = Logic->getFlags(); + + // We need an G_ICMP on the LHS register. + GICmp *Cmp1 = getOpcodeDef(LHS, MRI); + if (!Cmp1) + return false; + + // We need an G_ICMP on the RHS register. + GICmp *Cmp2 = getOpcodeDef(RHS, MRI); + if (!Cmp2) + return false; + + APInt C1; + APInt C2; + std::optional MaybeC1 = + getIConstantVRegValWithLookThrough(Cmp1->getRHSReg(), MRI); + if (!MaybeC1) + return false; + C1 = MaybeC1->Value; + + std::optional MaybeC2 = + getIConstantVRegValWithLookThrough(Cmp2->getRHSReg(), MRI); + if (!MaybeC2) + return false; + C2 = MaybeC2->Value; + + Register R1 = Cmp1->getLHSReg(); + Register R2 = Cmp2->getLHSReg(); + CmpInst::Predicate Pred1 = Cmp1->getCond(); + CmpInst::Predicate Pred2 = Cmp2->getCond(); + LLT CmpTy = MRI.getType(Cmp1->getReg(0)); + LLT CmpOperandTy = MRI.getType(R1); + + // We build ands, adds, and constants of type CmpOperandTy. + // They must be legal to build. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, CmpOperandTy})) + return false; + + // Look through add of a constant offset on R1, R2, or both operands. This + // allows us to interpret the R + C' < C'' range idiom into a proper range. + std::optional Offset1; + std::optional Offset2; + if (R1 != R2) { + if (GAdd *Add = getOpcodeDef(R1, MRI)) { + std::optional MaybeOffset1 = + getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI); + if (MaybeOffset1) { + R1 = Add->getLHSReg(); + Offset1 = MaybeOffset1->Value; + } + } + if (GAdd *Add = getOpcodeDef(R2, MRI)) { + std::optional MaybeOffset2 = + getIConstantVRegValWithLookThrough(Add->getRHSReg(), MRI); + if (MaybeOffset2) { + R2 = Add->getLHSReg(); + Offset2 = MaybeOffset2->Value; + } + } + } + + if (R1 != R2) + return false; + + // We calculate the icmp ranges including maybe offsets. + ConstantRange CR1 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1); + if (Offset1) + CR1 = CR1.subtract(*Offset1); + + ConstantRange CR2 = ConstantRange::makeExactICmpRegion( + IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2); + if (Offset2) + CR2 = CR2.subtract(*Offset2); + + bool CreateMask = false; + APInt LowerDiff; + std::optional CR = CR1.exactUnionWith(CR2); + if (!CR) { + // We want to fold the icmps. + if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) || + !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) || CR1.isWrappedSet() || + CR2.isWrappedSet()) + return false; + + // Check whether we have equal-size ranges that only differ by one bit. + // In that case we can apply a mask to map one range onto the other. + LowerDiff = CR1.getLower() ^ CR2.getLower(); + APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1); + APInt CR1Size = CR1.getUpper() - CR1.getLower(); + if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff || + CR1Size != CR2.getUpper() - CR2.getLower()) + return false; + + CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2; + CreateMask = true; + } + + if (IsAnd) + CR = CR->inverse(); + + CmpInst::Predicate NewPred; + APInt NewC, Offset; + CR->getEquivalentICmp(NewPred, NewC, Offset); + + // We take the result type of one of the original icmps, CmpTy, for + // the to be build icmp. The operand type, CmpOperandTy, is used for + // the other instructions and constants to be build. The types of + // the parameters and output are the same for add and and. CmpTy + // and the type of DstReg might differ. That is why we zext or trunc + // the icmp into the destination register. + + MatchInfo = [=](MachineIRBuilder &B) { + if (CreateMask && Offset != 0) { + auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff); + auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask. + auto OffsetC = B.buildConstant(CmpOperandTy, Offset); + auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags); + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (CreateMask && Offset == 0) { + auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff); + auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask. + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (!CreateMask && Offset != 0) { + auto OffsetC = B.buildConstant(CmpOperandTy, Offset); + auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags); + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else if (!CreateMask && Offset == 0) { + auto NewCon = B.buildConstant(CmpOperandTy, NewC); + auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon); + B.buildZExtOrTrunc(DstReg, ICmp); + } else { + assert(false && "unexpected configuration of CreateMask and Offset"); + } + }; + return true; +} + +bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) { + GAnd *And = cast(&MI); + + if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo)) + return true; + + return false; +} + +bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) { + GOr *Or = cast(&MI); + + if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo)) + return true; + + return false; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir new file mode 100644 index 0000000000000..b1e19e6e3ef29 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir @@ -0,0 +1,214 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +# icmp (x, 1) && icmp (x, 2) -> x +name: test_icmp_and_icmp +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_and_icmp + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 1 + %two:_(s64) = G_CONSTANT i64 2 + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two + %and:_(s1) = G_AND %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %and(s1) + $x0 = COPY %zext +... +--- +# multi use icmp (x, 1) && icmp (x, 2) -> x +name: multi_use_test_icmp_and_icmp +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: multi_use_test_icmp_and_icmp + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %cmp1:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: %cmp1zext:_(s64) = G_ZEXT %cmp1(s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + ; CHECK-NEXT: $x0 = COPY %cmp1zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 1 + %two:_(s64) = G_CONSTANT i64 2 + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two + %and:_(s1) = G_AND %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %and(s1) + %cmp1zext:_(s64) = G_ZEXT %cmp1(s1) + $x0 = COPY %zext + $x0 = COPY %cmp1zext +... +--- +# icmp (x, 1) && icmp (x, add(x, 2)) -> x +name: test_icmp_and_icmp_with_add +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_and_icmp_with_add + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %cmp1:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %cmp1(s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 1 + %two:_(s64) = G_CONSTANT i64 2 + %add:_(s64) = G_ADD %0(s64), %two + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %add + %and:_(s1) = G_AND %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %and(s1) + $x0 = COPY %zext +... +--- +# icmp (x, 1) && icmp (x, add(x, 2000)) -> x +name: test_icmp_or_icmp_with_add_2000 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_or_icmp_with_add_2000 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 -100 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 -100 + %two:_(s64) = G_CONSTANT i64 2000 + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two + %or:_(s1) = G_AND %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %or(s1) + $x0 = COPY %zext +... +--- +# icmp (x, -100) || icmp (x, 2000) -> x +name: test_icmp_or_icmp +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_or_icmp + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2000 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), %two + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 -100 + %two:_(s64) = G_CONSTANT i64 2000 + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(ne), %0(s64), %two + %or:_(s1) = G_OR %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %or(s1) + $x0 = COPY %zext +... +--- +# offset icmp (x, -100) || icmp (x, 2000) -> x +name: test_icmp_or_icmp_offset +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_or_icmp_offset + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2001 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2101 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s64), [[C1]] + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %one:_(s64) = G_CONSTANT i64 -100 + %two:_(s64) = G_CONSTANT i64 2000 + %cmp1:_(s1) = G_ICMP intpred(slt), %0(s64), %one + %cmp2:_(s1) = G_ICMP intpred(sgt), %0(s64), %two + %or:_(s1) = G_OR %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %or(s1) + $x0 = COPY %zext +... +--- +# icmp (x, add(x, 9) || icmp (x, add(x, 2)) -> x +name: test_icmp_or_icmp_with_add_and_add +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_or_icmp_with_add_and_add + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: %add2:_(s64) = G_ADD [[COPY]], %two + ; CHECK-NEXT: %cmp2:_(s1) = G_ICMP intpred(ne), [[COPY1]](s64), %add2 + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %cmp2(s1) + ; CHECK-NEXT: $x0 = COPY %zext(s64) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %nine:_(s64) = G_CONSTANT i64 9 + %two:_(s64) = G_CONSTANT i64 2 + %add1:_(s64) = G_ADD %0(s64), %nine + %add2:_(s64) = G_ADD %0(s64), %two + %cmp1:_(s1) = G_ICMP intpred(eq), %0(s64), %add1 + %cmp2:_(s1) = G_ICMP intpred(ne), %1(s64), %add2 + %and:_(s1) = G_OR %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %and(s1) + $x0 = COPY %zext +... +--- +# icmp (x, 9) && icmp (x, 2)) -> x +# buildConstant 0 +# buildICmp ult, R1, NewC +# buildZExtOrTrunc -> COPY +# erase G_AND +# x > 9 && x < 2 => false +name: test_icmp_and_icmp_9_2 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_and_icmp_9_2 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C]](s64) + %0:_(s64) = COPY $x0 + %nine:_(s64) = G_CONSTANT i64 9 + %two:_(s64) = G_CONSTANT i64 2 + %cmp1:_(s1) = G_ICMP intpred(sgt), %0(s64), %nine + %cmp2:_(s1) = G_ICMP intpred(slt), %0(s64), %two + %and:_(s1) = G_AND %cmp1, %cmp2 + %zext:_(s64) = G_ZEXT %and(s1) + $x0 = COPY %zext +... diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 446526986b883..5d3b2d3649e1b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -635,19 +635,7 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) { ; ; GISEL-LABEL: select_noccmp1: ; GISEL: ; %bb.0: -; GISEL-NEXT: cmp x0, #0 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp x0, #13 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: cmp x2, #2 -; GISEL-NEXT: cset w10, lt -; GISEL-NEXT: cmp x2, #4 -; GISEL-NEXT: cset w11, gt -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: and w9, w10, w11 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel x0, xzr, x3, ne +; GISEL-NEXT: mov x0, x3 ; GISEL-NEXT: ret %c0 = icmp slt i64 %v1, 0 %c1 = icmp sgt i64 %v1, 13 @@ -677,11 +665,8 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) { ; ; GISEL-LABEL: select_noccmp2: ; GISEL: ; %bb.0: -; GISEL-NEXT: cmp x0, #0 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp x0, #13 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: cmp x0, #14 +; GISEL-NEXT: cset w8, hs ; GISEL-NEXT: tst w8, #0x1 ; GISEL-NEXT: csel x0, xzr, x3, ne ; GISEL-NEXT: sbfx w8, w8, #0, #1 @@ -719,25 +704,14 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) { ; ; GISEL-LABEL: select_noccmp3: ; GISEL: ; %bb.0: -; GISEL-NEXT: cmp w0, #0 -; GISEL-NEXT: cset w8, lt -; GISEL-NEXT: cmp w0, #13 -; GISEL-NEXT: cset w9, gt -; GISEL-NEXT: cmp w0, #22 -; GISEL-NEXT: cset w10, lt -; GISEL-NEXT: cmp w0, #44 -; GISEL-NEXT: cset w11, gt -; GISEL-NEXT: cmp w0, #99 -; GISEL-NEXT: cset w12, eq +; GISEL-NEXT: mov w8, #99 ; =0x63 +; GISEL-NEXT: sub w9, w0, #45 +; GISEL-NEXT: mov w10, #-23 ; =0xffffffe9 ; GISEL-NEXT: cmp w0, #77 -; GISEL-NEXT: cset w13, eq -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: orr w9, w10, w11 -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: orr w9, w12, w13 -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: tst w8, #0x1 -; GISEL-NEXT: csel w0, w1, w2, ne +; GISEL-NEXT: ccmp w0, w8, #4, ne +; GISEL-NEXT: ccmp w9, w10, #2, eq +; GISEL-NEXT: ccmp w0, #14, #0, lo +; GISEL-NEXT: csel w0, w1, w2, hs ; GISEL-NEXT: ret %c0 = icmp slt i32 %v0, 0 %c1 = icmp sgt i32 %v0, 13 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index d74948a460c98..faa8257bb52e9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -183,12 +183,10 @@ define i1 @snan_f16(half %x) nounwind { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1ff +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: snan_f16: @@ -894,11 +892,9 @@ define i1 @not_isnan_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c01 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: not_isnan_f16: @@ -1539,10 +1535,8 @@ define i1 @not_issubnormal_or_zero_f16(half %x) { ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v1, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 +; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 @@ -2100,11 +2094,9 @@ define i1 @ispositive_f16(half %x) { ; GFX7GLISEL: ; %bb.0: ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c01 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: ispositive_f16: @@ -2291,13 +2283,12 @@ define i1 @not_isnegative_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c01 ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v2 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 +; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2352,11 +2343,10 @@ define i1 @iszero_or_nan_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: iszero_or_nan_f16: @@ -2411,11 +2401,10 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: iszero_or_nan_f_daz: @@ -2470,11 +2459,10 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8400 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: iszero_or_nan_f_maybe_daz: @@ -2730,11 +2718,10 @@ define i1 @iszero_or_qnan_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff8200, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0xffff8201 +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: iszero_or_qnan_f16: @@ -2792,13 +2779,11 @@ define i1 @iszero_or_snan_f16(half %x) { ; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[6:7], v0, v1 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX7GLISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX7GLISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffff83ff, v0 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x1ff +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2872,15 +2857,14 @@ define i1 @not_iszero_or_qnan_f16(half %x) { ; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00 ; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2 -; GFX7GLISEL-NEXT: s_or_b64 s[6:7], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v1, v2 -; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2 -; GFX7GLISEL-NEXT: s_and_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX7GLISEL-NEXT: v_add_i32_e32 v1, vcc, 0xffff83ff, v1 +; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x1ff +; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2 +; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7800 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] ; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 ; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -3017,10 +3001,8 @@ define i1 @isinf_or_nan_f16(half %x) { ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: isinf_or_nan_f16: @@ -3129,10 +3111,8 @@ define i1 @isfinite_or_nan_f(half %x) { ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00 -; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1 -; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1 -; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 +; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX8CHECK-LABEL: isfinite_or_nan_f: From debb70578b674576876755a6ca7bd8cdc3fa7798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Fri, 12 Jan 2024 08:50:43 +0100 Subject: [PATCH 2/4] vector test --- .../GlobalISel/combine-logic-of-compare.mir | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir index b1e19e6e3ef29..f0c4ef477bdc1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir @@ -212,3 +212,49 @@ body: | %zext:_(s64) = G_ZEXT %and(s1) $x0 = COPY %zext ... +--- +# icmp (x, v1) && icmp (x, v2)) -> x +name: test_icmp_and_icmp_with_vectors +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_icmp_and_icmp_with_vectors + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7 + ; CHECK-NEXT: %v1:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: %v2:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: %v3:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK-NEXT: %v4:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) + ; CHECK-NEXT: %cmp1:_(<2 x s1>) = G_ICMP intpred(ne), %v1(<2 x s64>), %v2 + ; CHECK-NEXT: %cmp2:_(<2 x s1>) = G_ICMP intpred(eq), %v3(<2 x s64>), %v4 + ; CHECK-NEXT: %and:_(<2 x s1>) = G_AND %cmp1, %cmp2 + ; CHECK-NEXT: %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>) + ; CHECK-NEXT: $q0 = COPY %zext(<2 x s64>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 + %4:_(s64) = COPY $x4 + %5:_(s64) = COPY $x5 + %6:_(s64) = COPY $x6 + %7:_(s64) = COPY $x7 + %nine:_(s64) = G_CONSTANT i64 9 + %two:_(s64) = G_CONSTANT i64 2 + %v1:_(<2 x s64>) = G_BUILD_VECTOR %0(s64), %1(s64) + %v2:_(<2 x s64>) = G_BUILD_VECTOR %2(s64), %3(s64) + %v3:_(<2 x s64>) = G_BUILD_VECTOR %4(s64), %5(s64) + %v4:_(<2 x s64>) = G_BUILD_VECTOR %6(s64), %7(s64) + %cmp1:_(<2 x s1>) = G_ICMP intpred(ne), %v1(<2 x s64>), %v2 + %cmp2:_(<2 x s1>) = G_ICMP intpred(eq), %v3(<2 x s64>), %v4 + %and:_(<2 x s1>) = G_AND %cmp1, %cmp2 + %zext:_(<2 x s64>) = G_ZEXT %and(<2 x s1>) + $q0 = COPY %zext +... From 86e90a99a3254009e7893b2adffe0fd88f20625c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Wed, 17 Jan 2024 12:19:41 +0100 Subject: [PATCH 3/4] address review comments --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 13 ++++++++----- .../AArch64/GlobalISel/combine-logic-of-compare.mir | 6 ++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b65c1914d6fde..cf742450af391 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6668,6 +6668,11 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, if (!Cmp2) return false; + // We want to fold the icmps. + if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) || + !MRI.hasOneNonDBGUse(Cmp2->getReg(0))) + return false; + APInt C1; APInt C2; std::optional MaybeC1 = @@ -6693,7 +6698,7 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, // They must be legal to build. if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) || !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) || - !isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, CmpOperandTy})) + !isConstantLegalOrBeforeLegalizer(CmpOperandTy)) return false; // Look through add of a constant offset on R1, R2, or both operands. This @@ -6737,10 +6742,8 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, APInt LowerDiff; std::optional CR = CR1.exactUnionWith(CR2); if (!CR) { - // We want to fold the icmps. - if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) || - !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) || CR1.isWrappedSet() || - CR2.isWrappedSet()) + // We need non-wrapping ranges. + if (CR1.isWrappedSet() || CR2.isWrappedSet()) return false; // Check whether we have equal-size ranges that only differ by one bit. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir index f0c4ef477bdc1..f667a83bf21a8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-logic-of-compare.mir @@ -36,9 +36,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %two:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: %cmp1:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), %one - ; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: %cmp2:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), %two + ; CHECK-NEXT: %and:_(s1) = G_AND %cmp1, %cmp2 + ; CHECK-NEXT: %zext:_(s64) = G_ZEXT %and(s1) ; CHECK-NEXT: %cmp1zext:_(s64) = G_ZEXT %cmp1(s1) ; CHECK-NEXT: $x0 = COPY %zext(s64) ; CHECK-NEXT: $x0 = COPY %cmp1zext(s64) From ea95b43c25cf8721755255da92891f8a654acd4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 6 Feb 2024 14:26:24 +0100 Subject: [PATCH 4/4] llvm_unreachable --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index cf742450af391..9418f1ff2bbcd 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include @@ -6799,7 +6800,7 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon); B.buildZExtOrTrunc(DstReg, ICmp); } else { - assert(false && "unexpected configuration of CreateMask and Offset"); + llvm_unreachable("unexpected configuration of CreateMask and Offset"); } }; return true;