Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
return nullptr;
}

/// Helper to match idempotent binary intrinsics, namely, intrinsics where
/// `f(f(x, y), y) == f(x, y)` holds.
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
switch (IID) {
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
case Intrinsic::maximum:
case Intrinsic::minimum:
case Intrinsic::maximumnum:
case Intrinsic::minimumnum:
case Intrinsic::maxnum:
case Intrinsic::minnum:
return true;
default:
return false;
}
}

/// Attempt to simplify value-accumulating recurrences of kind:
/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
/// And let the idempotent binary intrinsic be hoisted, when the operands are
/// known to be loop-invariant.
static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
IntrinsicInst *II) {
PHINode *PN;
Value *Init, *OtherOp;

// A binary intrinsic recurrence with loop-invariant operands is equivalent to
// `call @llvm.binary.intrinsic(Init, OtherOp)`.
auto IID = II->getIntrinsicID();
if (!isIdempotentBinaryIntrinsic(IID) ||
!matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
!IC.getDominatorTree().dominates(OtherOp, PN))
return nullptr;

auto *InvariantBinaryInst =
IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
if (isa<FPMathOperator>(InvariantBinaryInst))
cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
Comment on lines +1573 to +1576
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto *InvariantBinaryInst =
IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
if (isa<FPMathOperator>(InvariantBinaryInst))
cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
auto *InvariantBinaryInst =
IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp, /*FMFSource=*/II);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's unfortunately not possible per assertion isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"' on non-fast-math binary intrinsics. Perhaps we may want to change this in IRBuilder intrinsic creation in the future.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use dyn_cast<FPMathOperator>(II)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I think you'd need another dyn_cast<Instruction>(InvariantBinaryInst) to check that CreateBinaryIntrinsic didn't constant-fold, as copyFastMathFlags is private to FPMathOperator. Think the current one may be cleaner.

return InvariantBinaryInst;
}

static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
if (!CanReorderLanes)
return nullptr;
Expand Down Expand Up @@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *Reverse = foldReversedIntrinsicOperands(II))
return replaceInstUsesWith(*II, Reverse);

if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II))
return replaceInstUsesWith(*II, Res);

// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]])
; CHECK-NEXT: ret float [[FMAX]]
;
entry:
Expand All @@ -265,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]])
; CHECK-NEXT: ret float [[FMIN]]
;
entry:
Expand All @@ -296,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) {
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]])
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
Expand Down
Loading