From 3730790a9f5d96a31346f25db1e9e1f18982c0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 15 Sep 2024 15:36:11 +0200 Subject: [PATCH 1/3] [GlobalIsel] Canonicalize G_ICMP As a side-effect, we start constant folding icmps. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++ .../CodeGen/GlobalISel/GenericMachineInstrs.h | 10 ++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 22 +++++ .../include/llvm/Target/GlobalISel/Combine.td | 25 +++-- llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 + .../GlobalISel/CombinerHelperCompares.cpp | 86 +++++++++++++++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 40 ++++++++ .../AArch64/GlobalISel/combine-visit-icmp.mir | 95 +++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 60 ++++-------- .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 ++-- llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 46 ++++----- 11 files changed, 328 insertions(+), 81 deletions(-) create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 828532dcffb7d..37c9422d19275 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/Register.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/InstrTypes.h" @@ -909,6 +910,8 @@ class CombinerHelper { bool matchCastOfBuildVector(const MachineInstr &CastMI, const MachineInstr &BVMI, BuildFnTy &MatchInfo); + bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; @@ -1023,6 +1026,9 @@ class CombinerHelper { bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo); bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const; + + bool constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst, + const GIConstant &RHSCst, BuildFnTy &MatchInfo); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index ef1171d9f1f64..2c459ccdd8a73 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -950,6 +950,16 @@ class GExtOrTruncOp : public GCastOp { }; }; +/// Represents a splat vector. +class GSplatVector : public GenericMachineInstr { +public: + Register getValueReg() const { return getOperand(1).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR; + }; +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index cf5fd6d6f288b..4aeacfdf4ecdf 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -593,5 +593,27 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI, /// estimate of the type. Type *getTypeForLLT(LLT Ty, LLVMContext &C); +enum class GIConstantKind { Scalar, FixedVector, ScalableVector }; + +/// An integer-like constant. +class GIConstant { + GIConstantKind Kind; + SmallVector Values; + APInt Value; + +public: + GIConstant(ArrayRef Values) + : Kind(GIConstantKind::FixedVector), Values(Values) {}; + GIConstant(const APInt &Value, GIConstantKind Kind) + : Kind(Kind), Value(Value) {}; + + GIConstantKind getKind() const { return Kind; } + + APInt getScalarValue() const; + + static std::optional getConstant(Register Const, + const MachineRegisterInfo &MRI); +}; + } // End namespace llvm. #endif diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index a595a51d7b01f..c66212d2ab12c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1007,9 +1007,6 @@ def double_icmp_zero_or_combine: GICombineRule< (G_ICMP $root, $p, $ordst, 0)) >; -def double_icmp_zero_and_or_combine : GICombineGroup<[double_icmp_zero_and_combine, - double_icmp_zero_or_combine]>; - def and_or_disjoint_mask : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -1918,6 +1915,20 @@ def cast_combines: GICombineGroup<[ integer_of_truncate ]>; +def canonicalize_icmp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_ICMP $root, $pred, $lhs, $rhs):$cmp, + [{ return Helper.matchCanonicalizeICmp(*${cmp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${cmp}, ${matchinfo}); }])>; + +def icmp_combines: GICombineGroup<[ + canonicalize_icmp, + icmp_to_true_false_known_bits, + icmp_to_lhs_known_bits, + double_icmp_zero_and_combine, + double_icmp_zero_or_combine, + redundant_binop_in_equality +]>; // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, @@ -1951,7 +1962,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p, def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + zext_trunc_fold, sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, @@ -1984,7 +1995,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, combine_extracted_vector_load, undef_combines, identity_combines, phi_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big, - reassocs, ptr_add_immed_chain, + reassocs, ptr_add_immed_chain, icmp_combines, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, known_bits_simplifications, @@ -1998,9 +2009,9 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, redundant_binop_in_equality, + sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, double_icmp_zero_and_or_combine, match_addos, + combine_concat_vector, match_addos, sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>; // A combine group used to for prelegalizer combiners at -O0. The combines in diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt index a15b76440364b..af1717dbf76f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMGlobalISel Combiner.cpp CombinerHelper.cpp CombinerHelperCasts.cpp + CombinerHelperCompares.cpp CombinerHelperVectorOps.cpp GIMatchTableExecutor.cpp GISelChangeObserver.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp new file mode 100644 index 0000000000000..39ac4ee205fb7 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -0,0 +1,86 @@ +//===- CombinerHelperCompares.cpp------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements CombinerHelper for G_ICMP. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, + const GIConstant &LHSCst, + const GIConstant &RHSCst, + BuildFnTy &MatchInfo) { + if (LHSCst.getKind() != GIConstantKind::Scalar) + return false; + + Register Dst = ICmp.getReg(0); + LLT DstTy = MRI.getType(Dst); + + if (!isConstantLegalOrBeforeLegalizer(DstTy)) + return false; + + CmpInst::Predicate Pred = ICmp.getCond(); + APInt LHS = LHSCst.getScalarValue(); + APInt RHS = RHSCst.getScalarValue(); + + bool Result = ICmpInst::compare(LHS, RHS, Pred); + + MatchInfo = [=](MachineIRBuilder &B) { + if (Result) + B.buildConstant(Dst, getICmpTrueVal(getTargetLowering(), + /*IsVector=*/DstTy.isVector(), + /*IsFP=*/false)); + else + B.buildConstant(Dst, 0); + }; + + return true; +} + +bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GICmp *Cmp = cast(&MI); + + Register Dst = Cmp->getReg(0); + Register LHS = Cmp->getLHSReg(); + Register RHS = Cmp->getRHSReg(); + + CmpInst::Predicate Pred = Cmp->getCond(); + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + if (auto CLHS = GIConstant::getConstant(LHS, MRI)) { + if (auto CRHS = GIConstant::getConstant(RHS, MRI)) + return constantFoldICmp(*Cmp, *CLHS, *CRHS, MatchInfo); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + + MatchInfo = [=](MachineIRBuilder &B) { B.buildICmp(Pred, Dst, LHS, RHS); }; + return true; + } + + return false; +} diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1713a582d5cfe..396f0f07ae905 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1968,3 +1968,43 @@ Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) { Ty.getElementCount()); return IntegerType::get(C, Ty.getSizeInBits()); } + +APInt llvm::GIConstant::getScalarValue() const { + assert(Kind == GIConstantKind::Scalar && "Expected scalar constant"); + + return Value; +} + +std::optional +llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) { + MachineInstr *Constant = getDefIgnoringCopies(Const, MRI); + + if (GSplatVector *Splat = dyn_cast(Constant)) { + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI); + if (!MayBeConstant) + return std::nullopt; + return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector); + } + + if (GBuildVector *Build = dyn_cast(Constant)) { + SmallVector Values; + unsigned NumSources = Build->getNumSources(); + for (unsigned I = 0; I < NumSources; ++I) { + Register SrcReg = Build->getSourceReg(I); + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(SrcReg, MRI); + if (!MayBeConstant) + return std::nullopt; + Values.push_back(MayBeConstant->Value); + } + return GIConstant(Values); + } + + std::optional MayBeConstant = + getIConstantVRegValWithLookThrough(Const, MRI); + if (!MayBeConstant) + return std::nullopt; + + return GIConstant(MayBeConstant->Value, GIConstantKind::Scalar); +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir new file mode 100644 index 0000000000000..bf04ac02d086a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK + +--- +name: test_icmp_canon +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_canon + ; CHECK: %lhs:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: %rhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(sgt), %rhs(s64), %lhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = G_CONSTANT i64 11 + %rhs:_(s64) = COPY $x0 + %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_no_canon +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_no_canon + ; CHECK: %lhs:_(s64) = COPY $x0 + ; CHECK-NEXT: %rhs:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs + ; CHECK-NEXT: $w0 = COPY %res(s32) + %lhs:_(s64) = COPY $x0 + %rhs:_(s64) = G_CONSTANT i64 11 + %res:_(s32) = G_ICMP intpred(slt), %lhs(s64), %rhs + $w0 = COPY %res(s32) +... +--- +name: test_icmp_canon_bv +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_canon_bv + ; CHECK: %opaque1:_(s64) = COPY $x0 + ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0 + ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64) + ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64) + ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(sgt), %rhs(<2 x s64>), %lhs + ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>) + %opaque1:_(s64) = COPY $x0 + %opaque2:_(s64) = COPY $x0 + %const1:_(s64) = G_CONSTANT i64 11 + %const2:_(s64) = G_CONSTANT i64 12 + %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %const2(s64) + %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %opaque2(s64) + %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs + $x0 = COPY %res(<2 x s32>) +... +--- +name: test_icmp_no_canon_bv_neither_const +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_no_canon_bv + ; CHECK: %opaque1:_(s64) = COPY $x0 + ; CHECK-NEXT: %opaque2:_(s64) = COPY $x0 + ; CHECK-NEXT: %const1:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: %const2:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64) + ; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64) + ; CHECK-NEXT: %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs + ; CHECK-NEXT: $x0 = COPY %res(<2 x s32>) + %opaque1:_(s64) = COPY $x0 + %opaque2:_(s64) = COPY $x0 + %const1:_(s64) = G_CONSTANT i64 11 + %const2:_(s64) = G_CONSTANT i64 12 + %lhs:_(<2 x s64>) = G_BUILD_VECTOR %const1(s64), %opaque2(s64) + %rhs:_(<2 x s64>) = G_BUILD_VECTOR %opaque1(s64), %const2(s64) + %res:_(<2 x s32>) = G_ICMP intpred(slt), %lhs(<2 x s64>), %rhs + $x0 = COPY %res(<2 x s32>) +... +--- +name: test_icmp_canon_splat +body: | + bb.1: + ; CHECK-LABEL: name: test_icmp_canon_splat + ; CHECK: %const:_(s64) = G_CONSTANT i64 11 + ; CHECK-NEXT: %lhs:_() = G_SPLAT_VECTOR %const(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %rhs:_() = G_SPLAT_VECTOR [[COPY]](s64) + ; CHECK-NEXT: %res:_() = G_ICMP intpred(sgt), %rhs(), %lhs + ; CHECK-NEXT: %z:_() = G_ZEXT %res() + ; CHECK-NEXT: $z0 = COPY %z() + %const:_(s64) = G_CONSTANT i64 11 + %lhs:_() = G_SPLAT_VECTOR %const:_(s64) + %1:_(s64) = COPY $x1 + %rhs:_() = G_SPLAT_VECTOR %1:_(s64) + %res:_() = G_ICMP intpred(slt), %lhs(), %rhs + %z:_() = G_ZEXT %res + $z0 = COPY %z() +... diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 50afc79a5a576..06e957fdcc6a2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -457,20 +457,12 @@ sw.bb.i.i: } define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_and: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #0, ne -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_and: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #0, ne -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #0, ne +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = and i1 %1, %2 @@ -479,20 +471,12 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { -; SDISEL-LABEL: select_or: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: csel x0, x2, x3, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: csel x0, x2, x3, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: csel x0, x2, x3, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 @@ -501,20 +485,12 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { } define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) { -; SDISEL-LABEL: select_or_float: -; SDISEL: ; %bb.0: -; SDISEL-NEXT: cmp w1, #5 -; SDISEL-NEXT: ccmp w0, w1, #8, eq -; SDISEL-NEXT: fcsel s0, s0, s1, lt -; SDISEL-NEXT: ret -; -; GISEL-LABEL: select_or_float: -; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 ; =0x5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: fcsel s0, s0, s1, lt -; GISEL-NEXT: ret +; CHECK-LABEL: select_or_float: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmp w1, #5 +; CHECK-NEXT: ccmp w0, w1, #8, eq +; CHECK-NEXT: fcsel s0, s0, s1, lt +; CHECK-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 %3 = or i1 %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index b1cdf553b7242..0b66185d25f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -253,7 +253,7 @@ define double @v_rcp_f64(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -346,7 +346,7 @@ define double @v_rcp_f64_arcp(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -483,7 +483,7 @@ define double @v_rcp_f64_ulp25(double %x) { ; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 ; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] @@ -1115,7 +1115,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1126,7 +1126,7 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1275,7 +1275,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1286,7 +1286,7 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 @@ -1502,7 +1502,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 ; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v9 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v9, v20 ; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[18:19], -v[4:5], v[12:13], v[8:9] @@ -1513,7 +1513,7 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6-NEXT: v_mul_f64 v[8:9], v[16:17], v[4:5] ; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[18:19], v[6:7], v[12:13] ; GFX6-NEXT: v_fma_f64 v[12:13], -v[10:11], v[8:9], v[16:17] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v20, v17 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v17, v20 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll index bd6e1f54e636d..8f4a4b5afcdc1 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll @@ -87,7 +87,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -267,7 +267,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_fabs(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -448,7 +448,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -628,7 +628,7 @@ define amdgpu_ps <2 x i32> @s_neg_rsq_neg_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -809,7 +809,7 @@ define double @v_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -978,7 +978,7 @@ define double @v_rsq_f64_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1148,7 +1148,7 @@ define double @v_rsq_f64_missing_contract0(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1317,7 +1317,7 @@ define double @v_rsq_f64_missing_contract1(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1486,7 +1486,7 @@ define double @v_neg_rsq_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -1716,7 +1716,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -1728,7 +1728,7 @@ define <2 x double> @v_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 @@ -2019,7 +2019,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[6:7], v[8:9] ; SI-GISEL-NEXT: v_mul_f64 v[14:15], v[12:13], v[4:5] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v20 ; SI-GISEL-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], v[12:13] ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] @@ -2031,7 +2031,7 @@ define <2 x double> @v_neg_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[18:19], v[6:7] ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v20, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v20 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2293,7 +2293,7 @@ define <2 x double> @v_neg_rsq_v2f64_poisonelt(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], s[4:5], v[2:3], s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2568,7 +2568,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[18:19], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[18:19], v[6:7] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v13 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v13, v10 ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[6:7], 1.0 ; SI-GISEL-NEXT: v_div_scale_f64 v[18:19], s[4:5], 1.0, v[2:3], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] @@ -2578,7 +2578,7 @@ define <2 x double> @v_neg_pos_rsq_v2f64(<2 x double> %x) { ; SI-GISEL-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], v[18:19] ; SI-GISEL-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-GISEL-NEXT: v_div_fmas_f64 v[4:5], v[16:17], v[4:5], v[14:15] -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v19 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v19, v8 ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], -1.0 @@ -2808,7 +2808,7 @@ define double @v_rsq_f64_fneg_fabs(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -2979,7 +2979,7 @@ define double @v_rsq_f64__afn_sqrt(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4167,7 +4167,7 @@ define double @v_rsq_f64__nnan_ninf(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4846,7 +4846,7 @@ define double @v_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -4927,7 +4927,7 @@ define double @v_neg_rsq_amdgcn_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], -1.0, v[0:1], -1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5008,7 +5008,7 @@ define amdgpu_ps <2 x i32> @s_rsq_amdgcn_sqrt_f64(double inreg %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[0:1], 1.0, v[0:1], 1.0 ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[0:1] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] @@ -5649,7 +5649,7 @@ define double @v_div_const_contract_sqrt_f64(double %x) { ; SI-GISEL-NEXT: v_div_scale_f64 v[8:9], s[4:5], s[6:7], v[0:1], s[6:7] ; SI-GISEL-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; SI-GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 -; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 +; SI-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v10 ; SI-GISEL-NEXT: s_xor_b64 vcc, vcc, s[4:5] ; SI-GISEL-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 ; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] From 5493fe2883a557713165dfedec1a3406a8d40e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 16 Sep 2024 13:04:22 +0200 Subject: [PATCH 2/3] address review comments --- .../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 13 +++++++++++-- .../CodeGen/GlobalISel/CombinerHelperCompares.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +- ...visit-icmp.mir => combine-canonicalize-icmp.mir} | 0 5 files changed, 14 insertions(+), 5 deletions(-) rename llvm/test/CodeGen/AArch64/GlobalISel/{combine-visit-icmp.mir => combine-canonicalize-icmp.mir} (100%) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 2c459ccdd8a73..09426150eabc7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -953,7 +953,7 @@ class GExtOrTruncOp : public GCastOp { /// Represents a splat vector. class GSplatVector : public GenericMachineInstr { public: - Register getValueReg() const { return getOperand(1).getReg(); } + Register getScalarReg() const { return getOperand(1).getReg(); } static bool classof(const MachineInstr *MI) { return MI->getOpcode() == TargetOpcode::G_SPLAT_VECTOR; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 4aeacfdf4ecdf..c33a4800bc9d6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -593,10 +593,17 @@ bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI, /// estimate of the type. Type *getTypeForLLT(LLT Ty, LLVMContext &C); -enum class GIConstantKind { Scalar, FixedVector, ScalableVector }; - /// An integer-like constant. +/// +/// It abstracts over scalar, fixed-length vectors, and scalable vectors. +/// In the common case, it provides a common API and feels like an APInt, +/// while still providing low-level access. +/// It can be used for constant-folding. class GIConstant { +public: + enum class GIConstantKind { Scalar, FixedVector, ScalableVector }; + +private: GIConstantKind Kind; SmallVector Values; APInt Value; @@ -607,8 +614,10 @@ class GIConstant { GIConstant(const APInt &Value, GIConstantKind Kind) : Kind(Kind), Value(Value) {}; + /// Returns the kind of of this constant, e.g, Scalar. GIConstantKind getKind() const { return Kind; } + /// Returns the value, if this constant is a scalar. APInt getScalarValue() const; static std::optional getConstant(Register Const, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 39ac4ee205fb7..025cd2dc9f87f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -33,7 +33,7 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst, const GIConstant &RHSCst, BuildFnTy &MatchInfo) { - if (LHSCst.getKind() != GIConstantKind::Scalar) + if (LHSCst.getKind() != GIConstant::GIConstantKind::Scalar) return false; Register Dst = ICmp.getReg(0); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 396f0f07ae905..15d3aa427d568 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1981,7 +1981,7 @@ llvm::GIConstant::getConstant(Register Const, const MachineRegisterInfo &MRI) { if (GSplatVector *Splat = dyn_cast(Constant)) { std::optional MayBeConstant = - getIConstantVRegValWithLookThrough(Splat->getValueReg(), MRI); + getIConstantVRegValWithLookThrough(Splat->getScalarReg(), MRI); if (!MayBeConstant) return std::nullopt; return GIConstant(MayBeConstant->Value, GIConstantKind::ScalableVector); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-canonicalize-icmp.mir similarity index 100% rename from llvm/test/CodeGen/AArch64/GlobalISel/combine-visit-icmp.mir rename to llvm/test/CodeGen/AArch64/GlobalISel/combine-canonicalize-icmp.mir From 067c8de9cd63133a1ad0b1debf2ba45de2815572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Mon, 16 Sep 2024 17:49:10 +0200 Subject: [PATCH 3/3] extend docstring --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index c33a4800bc9d6..76e0954357a5d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -599,6 +599,14 @@ Type *getTypeForLLT(LLT Ty, LLVMContext &C); /// In the common case, it provides a common API and feels like an APInt, /// while still providing low-level access. /// It can be used for constant-folding. +/// +/// bool isZero() +/// abstracts over the kind. +/// +/// switch(const.getKind()) +/// { +/// } +/// provides low-level access. class GIConstant { public: enum class GIConstantKind { Scalar, FixedVector, ScalableVector };