Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b0a057b
[InstSimplify] Optimize maximumnum and minimumnum
LewisCrawford May 13, 2025
9c05591
Add more vector tests + cleanup
LewisCrawford May 13, 2025
35b7d08
Add vector tests for all inf/max tests
LewisCrawford May 13, 2025
aa652bf
Avoid optimization for maxnum(x, <sNaN, qNaN>)
LewisCrawford May 13, 2025
65c4e84
Update failing AMDGPU tests
LewisCrawford May 13, 2025
a80f4f0
Tidy up if/else chain
LewisCrawford May 15, 2025
4e7a52e
Rewrite fmax/fmin InstSimplify elementwise
LewisCrawford May 30, 2025
97fa626
Remove unused pattern match functions
LewisCrawford Jul 3, 2025
7b1b750
Fix AMDGPU tests
LewisCrawford Jul 3, 2025
75cff98
Avoid elementwise testing for failed splats
LewisCrawford Jul 3, 2025
23c002e
Fix comment consistency
LewisCrawford Aug 18, 2025
39c6a7f
Merge branch 'main' into instsimplify_maximumnum
LewisCrawford Aug 18, 2025
12c05ac
Refactor to simplify
LewisCrawford Aug 20, 2025
d2d307f
Merge remote-tracking branch 'origin/main' into instsimplify_maximumnum
LewisCrawford Sep 25, 2025
08b0df5
Regenerate test results
LewisCrawford Sep 25, 2025
349bbb3
Update test comments (remove TODOs)
LewisCrawford Sep 25, 2025
d3c2f6f
Rename some CHECK variables for consistentcy
LewisCrawford Sep 25, 2025
989bd58
Add new mixed vector element tests
LewisCrawford Sep 25, 2025
18fc2ba
Fix more variable naming inconsistencies
LewisCrawford Sep 25, 2025
fd1d020
Fix more variable naming issues
LewisCrawford Sep 25, 2025
216a740
Merge branch 'main' into instsimplify_maximumnum
LewisCrawford Sep 25, 2025
2bc2b18
Fix unused variable error
LewisCrawford Sep 25, 2025
06e3062
Merge branch 'main' into instsimplify_maximumnum
LewisCrawford Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 137 additions & 36 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6473,7 +6473,8 @@ static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
Value *Op1) {
assert((IID == Intrinsic::maxnum || IID == Intrinsic::minnum ||
IID == Intrinsic::maximum || IID == Intrinsic::minimum) &&
IID == Intrinsic::maximum || IID == Intrinsic::minimum ||
IID == Intrinsic::maximumnum || IID == Intrinsic::minimumnum) &&
"Unsupported intrinsic");

auto *M0 = dyn_cast<IntrinsicInst>(Op0);
Expand Down Expand Up @@ -6512,6 +6513,82 @@ static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
return nullptr;
}

enum class MinMaxOptResult {
CannotOptimize = 0,
UseNewConstVal = 1,
UseOtherVal = 2,
// For undef/poison, we can choose to either propgate undef/poison or
// use the LHS value depending on what will allow more optimization.
UseEither = 3
};
// Get the optimized value for a min/max instruction with a single constant
// input (either undef or scalar constantFP). The result may indicate to
// use the non-const LHS value, use a new constant value instead (with NaNs
// quieted), or to choose either option in the case of undef/poison.
static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
const Intrinsic::ID IID,
const CallBase *Call,
Constant **OutNewConstVal) {
assert(OutNewConstVal != nullptr);

bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
bool PropagateSNaN = IID == Intrinsic::minnum || IID == Intrinsic::maxnum;
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
IID == Intrinsic::minimumnum;

// min/max(x, poison) -> either x or poison
if (isa<UndefValue>(RHSConst)) {
*OutNewConstVal = const_cast<Constant *>(RHSConst);
return MinMaxOptResult::UseEither;
}

const ConstantFP *CFP = dyn_cast<ConstantFP>(RHSConst);
if (!CFP)
return MinMaxOptResult::CannotOptimize;
APFloat CAPF = CFP->getValueAPF();

// minnum(x, qnan) -> x
// maxnum(x, qnan) -> x
// minnum(x, snan) -> qnan
// maxnum(x, snan) -> qnan
// minimum(X, nan) -> qnan
// maximum(X, nan) -> qnan
// minimumnum(X, nan) -> x
// maximumnum(X, nan) -> x
if (CAPF.isNaN()) {
if (PropagateNaN || (PropagateSNaN && CAPF.isSignaling())) {
*OutNewConstVal = ConstantFP::get(CFP->getType(), CAPF.makeQuiet());
return MinMaxOptResult::UseNewConstVal;
}
return MinMaxOptResult::UseOtherVal;
}

if (CAPF.isInfinity() || (Call && Call->hasNoInfs() && CAPF.isLargest())) {
// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
// minimumnum(X, -inf) -> -inf
// maximumnum(X, +inf) -> +inf
if (CAPF.isNegative() == IsMin &&
(!PropagateNaN || (Call && Call->hasNoNaNs()))) {
*OutNewConstVal = const_cast<Constant *>(RHSConst);
return MinMaxOptResult::UseNewConstVal;
}

// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X (ignoring quieting of sNaNs)
// maximum(X, -inf) -> X (ignoring quieting of sNaNs)
// minimumnum(X, +inf) -> X if nnan
// maximumnum(X, -inf) -> X if nnan
if (CAPF.isNegative() != IsMin &&
(PropagateNaN || (Call && Call->hasNoNaNs())))
return MinMaxOptResult::UseOtherVal;
}
return MinMaxOptResult::CannotOptimize;
}

Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
Value *Op0, Value *Op1,
const SimplifyQuery &Q,
Expand Down Expand Up @@ -6780,49 +6857,73 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum: {
// If the arguments are the same, this is a no-op.
case Intrinsic::minimum:
case Intrinsic::maximumnum:
case Intrinsic::minimumnum: {
// In several cases here, we deviate from exact IEEE 754 semantics
// to enable optimizations (as allowed by the LLVM IR spec).
//
// For instance, we may return one of the arguments unmodified instead of
// inserting an llvm.canonicalize to transform input sNaNs into qNaNs,
// or may assume all NaN inputs are qNaNs.

// If the arguments are the same, this is a no-op (ignoring NaN quieting)
if (Op0 == Op1)
return Op0;

// Canonicalize constant operand as Op1.
if (isa<Constant>(Op0))
std::swap(Op0, Op1);

// If an argument is undef, return the other argument.
if (Q.isUndefValue(Op1))
return Op0;
if (Constant *C = dyn_cast<Constant>(Op1)) {
MinMaxOptResult OptResult = MinMaxOptResult::CannotOptimize;
Constant *NewConst = nullptr;

if (VectorType *VTy = dyn_cast<VectorType>(C->getType())) {
ElementCount ElemCount = VTy->getElementCount();

if (Constant *SplatVal = C->getSplatValue()) {
// Handle splat vectors (including scalable vectors)
OptResult = OptimizeConstMinMax(SplatVal, IID, Call, &NewConst);
if (OptResult == MinMaxOptResult::UseNewConstVal)
NewConst = ConstantVector::getSplat(ElemCount, NewConst);

} else if (ElemCount.isFixed()) {
// Storage to build up new const return value (with NaNs quieted)
SmallVector<Constant *, 16> NewC(ElemCount.getFixedValue());

// Check elementwise whether we can optimize to either a constant
// value or return the LHS value. We cannot mix and match LHS +
// constant elements, as this would require inserting a new
// VectorShuffle instruction, which is not allowed in simplifyBinOp.
OptResult = MinMaxOptResult::UseEither;
for (unsigned i = 0; i != ElemCount.getFixedValue(); ++i) {
auto ElemResult = OptimizeConstMinMax(C->getAggregateElement(i),
IID, Call, &NewConst);
if (ElemResult == MinMaxOptResult::CannotOptimize ||
(ElemResult != OptResult &&
OptResult != MinMaxOptResult::UseEither &&
ElemResult != MinMaxOptResult::UseEither)) {
OptResult = MinMaxOptResult::CannotOptimize;
break;
}
NewC[i] = NewConst;
if (ElemResult != MinMaxOptResult::UseEither)
OptResult = ElemResult;
}
if (OptResult == MinMaxOptResult::UseNewConstVal)
NewConst = ConstantVector::get(NewC);
}
} else {
// Handle scalar inputs
OptResult = OptimizeConstMinMax(C, IID, Call, &NewConst);
}

bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum;

// minnum(X, nan) -> X
// maxnum(X, nan) -> X
// minimum(X, nan) -> nan
// maximum(X, nan) -> nan
if (match(Op1, m_NaN()))
return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0;

// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
const APFloat *C;
if (match(Op1, m_APFloat(C)) &&
(C->isInfinity() || (Call && Call->hasNoInfs() && C->isLargest()))) {
// minnum(X, -inf) -> -inf
// maxnum(X, +inf) -> +inf
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
if (C->isNegative() == IsMin &&
(!PropagateNaN || (Call && Call->hasNoNaNs())))
return ConstantFP::get(ReturnType, *C);

// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X
// maximum(X, -inf) -> X
if (C->isNegative() != IsMin &&
(PropagateNaN || (Call && Call->hasNoNaNs())))
return Op0;
if (OptResult == MinMaxOptResult::UseOtherVal ||
OptResult == MinMaxOptResult::UseEither)
return Op0; // Return the other arg (ignoring NaN quieting)
else if (OptResult == MinMaxOptResult::UseNewConstVal)
return NewConst;
}

// Min/max of the same operation with common operand:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/ValueTracking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9095,6 +9095,10 @@ Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
case Intrinsic::minimum: return Intrinsic::maximum;
case Intrinsic::maxnum: return Intrinsic::minnum;
case Intrinsic::minnum: return Intrinsic::maxnum;
case Intrinsic::maximumnum:
return Intrinsic::minimumnum;
case Intrinsic::minimumnum:
return Intrinsic::maximumnum;
default: llvm_unreachable("Unexpected intrinsic");
}
}
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
ret void
}

; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.

; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1949,8 +1949,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
Expand All @@ -1961,7 +1960,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
Expand Down Expand Up @@ -2285,8 +2284,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
Expand All @@ -2297,7 +2295,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
Expand Down
39 changes: 11 additions & 28 deletions llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -269,42 +269,27 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 {
}

define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 {
; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE1-NEXT: ret float [[X]]
;
; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
; IEEE0-NEXT: ret float [[MED3]]
; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: ret float [[X]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
ret float %med3
}

define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 {
; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE1-NEXT: ret float [[X]]
;
; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
; IEEE0-NEXT: ret float [[MED3]]
; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: ret float [[X]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
ret float %med3
}

define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 {
; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE1-NEXT: ret float [[X]]
;
; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
; IEEE0-NEXT: ret float [[MED3]]
; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: ret float [[X]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
ret float %med3
Expand Down Expand Up @@ -448,8 +433,7 @@ define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
;
; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32(
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
; IEEE0-NEXT: ret float [[MED3]]
; IEEE0-NEXT: ret float [[X]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000)
ret float %med3
Expand All @@ -462,8 +446,7 @@ define float @fmed3_x_snan1_snan2_f32(float %x) #1 {
;
; IEEE0-LABEL: define float @fmed3_x_snan1_snan2_f32(
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
; IEEE0-NEXT: ret float [[MED3]]
; IEEE0-NEXT: ret float [[X]]
;
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000)
ret float %med3
Expand Down
Loading