diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 47e017e17092b..d7a2ef722c846 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V, return nullptr; } +/// Helper to match idempotent binary intrinsics, namely, intrinsics where +/// `f(f(x, y), y) == f(x, y)` holds. +static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::maximum: + case Intrinsic::minimum: + case Intrinsic::maximumnum: + case Intrinsic::minimumnum: + case Intrinsic::maxnum: + case Intrinsic::minnum: + return true; + default: + return false; + } +} + +/// Attempt to simplify value-accumulating recurrences of kind: +/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] +/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) +/// And let the idempotent binary intrinsic be hoisted, when the operands are +/// known to be loop-invariant. +static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, + IntrinsicInst *II) { + PHINode *PN; + Value *Init, *OtherOp; + + // A binary intrinsic recurrence with loop-invariant operands is equivalent to + // `call @llvm.binary.intrinsic(Init, OtherOp)`. + auto IID = II->getIntrinsicID(); + if (!isIdempotentBinaryIntrinsic(IID) || + !matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) || + !IC.getDominatorTree().dominates(OtherOp, PN)) + return nullptr; + + auto *InvariantBinaryInst = + IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp); + if (isa(InvariantBinaryInst)) + cast(InvariantBinaryInst)->copyFastMathFlags(II); + return InvariantBinaryInst; +} + static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) { if (!CanReorderLanes) return nullptr; @@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *Reverse = foldReversedIntrinsicOperands(II)) return replaceInstUsesWith(*II, Reverse); + if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II)) + return replaceInstUsesWith(*II, Res); + // Some intrinsics (like experimental_gc_statepoint) can be used in invoke // context, so it is handled in visitCallBase and we should trigger it. return visitCallBase(*II); diff --git a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll index 86e586ef0a16c..a4e247efc4d23 100644 --- a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll @@ -236,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] -; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: +; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]]) ; CHECK-NEXT: ret float [[FMAX]] ; entry: @@ -265,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] -; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: +; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]]) ; CHECK-NEXT: ret float [[FMIN]] ; entry: @@ -296,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) { ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] ; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]]) -; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]]) +; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]