diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 07f4a8e5c889e..d8ee0007bb3e2 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6469,7 +6469,8 @@ static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) { static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) { assert((IID == Intrinsic::maxnum || IID == Intrinsic::minnum || - IID == Intrinsic::maximum || IID == Intrinsic::minimum) && + IID == Intrinsic::maximum || IID == Intrinsic::minimum || + IID == Intrinsic::maximumnum || IID == Intrinsic::minimumnum) && "Unsupported intrinsic"); auto *M0 = dyn_cast(Op0); @@ -6508,6 +6509,82 @@ static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0, return nullptr; } +enum class MinMaxOptResult { + CannotOptimize = 0, + UseNewConstVal = 1, + UseOtherVal = 2, + // For undef/poison, we can choose to either propgate undef/poison or + // use the LHS value depending on what will allow more optimization. + UseEither = 3 +}; +// Get the optimized value for a min/max instruction with a single constant +// input (either undef or scalar constantFP). The result may indicate to +// use the non-const LHS value, use a new constant value instead (with NaNs +// quieted), or to choose either option in the case of undef/poison. +static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst, + const Intrinsic::ID IID, + const CallBase *Call, + Constant **OutNewConstVal) { + assert(OutNewConstVal != nullptr); + + bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum; + bool PropagateSNaN = IID == Intrinsic::minnum || IID == Intrinsic::maxnum; + bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum || + IID == Intrinsic::minimumnum; + + // min/max(x, poison) -> either x or poison + if (isa(RHSConst)) { + *OutNewConstVal = const_cast(RHSConst); + return MinMaxOptResult::UseEither; + } + + const ConstantFP *CFP = dyn_cast(RHSConst); + if (!CFP) + return MinMaxOptResult::CannotOptimize; + APFloat CAPF = CFP->getValueAPF(); + + // minnum(x, qnan) -> x + // maxnum(x, qnan) -> x + // minnum(x, snan) -> qnan + // maxnum(x, snan) -> qnan + // minimum(X, nan) -> qnan + // maximum(X, nan) -> qnan + // minimumnum(X, nan) -> x + // maximumnum(X, nan) -> x + if (CAPF.isNaN()) { + if (PropagateNaN || (PropagateSNaN && CAPF.isSignaling())) { + *OutNewConstVal = ConstantFP::get(CFP->getType(), CAPF.makeQuiet()); + return MinMaxOptResult::UseNewConstVal; + } + return MinMaxOptResult::UseOtherVal; + } + + if (CAPF.isInfinity() || (Call && Call->hasNoInfs() && CAPF.isLargest())) { + // minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation) + // maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation) + // minimum(X, -inf) -> -inf if nnan + // maximum(X, +inf) -> +inf if nnan + // minimumnum(X, -inf) -> -inf + // maximumnum(X, +inf) -> +inf + if (CAPF.isNegative() == IsMin && + (!PropagateNaN || (Call && Call->hasNoNaNs()))) { + *OutNewConstVal = const_cast(RHSConst); + return MinMaxOptResult::UseNewConstVal; + } + + // minnum(X, +inf) -> X if nnan + // maxnum(X, -inf) -> X if nnan + // minimum(X, +inf) -> X (ignoring quieting of sNaNs) + // maximum(X, -inf) -> X (ignoring quieting of sNaNs) + // minimumnum(X, +inf) -> X if nnan + // maximumnum(X, -inf) -> X if nnan + if (CAPF.isNegative() != IsMin && + (PropagateNaN || (Call && Call->hasNoNaNs()))) + return MinMaxOptResult::UseOtherVal; + } + return MinMaxOptResult::CannotOptimize; +} + Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -6776,8 +6853,17 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: - case Intrinsic::minimum: { - // If the arguments are the same, this is a no-op. + case Intrinsic::minimum: + case Intrinsic::maximumnum: + case Intrinsic::minimumnum: { + // In several cases here, we deviate from exact IEEE 754 semantics + // to enable optimizations (as allowed by the LLVM IR spec). + // + // For instance, we may return one of the arguments unmodified instead of + // inserting an llvm.canonicalize to transform input sNaNs into qNaNs, + // or may assume all NaN inputs are qNaNs. + + // If the arguments are the same, this is a no-op (ignoring NaN quieting) if (Op0 == Op1) return Op0; @@ -6785,40 +6871,55 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType, if (isa(Op0)) std::swap(Op0, Op1); - // If an argument is undef, return the other argument. - if (Q.isUndefValue(Op1)) - return Op0; + if (Constant *C = dyn_cast(Op1)) { + MinMaxOptResult OptResult = MinMaxOptResult::CannotOptimize; + Constant *NewConst = nullptr; + + if (VectorType *VTy = dyn_cast(C->getType())) { + ElementCount ElemCount = VTy->getElementCount(); + + if (Constant *SplatVal = C->getSplatValue()) { + // Handle splat vectors (including scalable vectors) + OptResult = OptimizeConstMinMax(SplatVal, IID, Call, &NewConst); + if (OptResult == MinMaxOptResult::UseNewConstVal) + NewConst = ConstantVector::getSplat(ElemCount, NewConst); + + } else if (ElemCount.isFixed()) { + // Storage to build up new const return value (with NaNs quieted) + SmallVector NewC(ElemCount.getFixedValue()); + + // Check elementwise whether we can optimize to either a constant + // value or return the LHS value. We cannot mix and match LHS + + // constant elements, as this would require inserting a new + // VectorShuffle instruction, which is not allowed in simplifyBinOp. + OptResult = MinMaxOptResult::UseEither; + for (unsigned i = 0; i != ElemCount.getFixedValue(); ++i) { + auto ElemResult = OptimizeConstMinMax(C->getAggregateElement(i), + IID, Call, &NewConst); + if (ElemResult == MinMaxOptResult::CannotOptimize || + (ElemResult != OptResult && + OptResult != MinMaxOptResult::UseEither && + ElemResult != MinMaxOptResult::UseEither)) { + OptResult = MinMaxOptResult::CannotOptimize; + break; + } + NewC[i] = NewConst; + if (ElemResult != MinMaxOptResult::UseEither) + OptResult = ElemResult; + } + if (OptResult == MinMaxOptResult::UseNewConstVal) + NewConst = ConstantVector::get(NewC); + } + } else { + // Handle scalar inputs + OptResult = OptimizeConstMinMax(C, IID, Call, &NewConst); + } - bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum; - bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum; - - // minnum(X, nan) -> X - // maxnum(X, nan) -> X - // minimum(X, nan) -> nan - // maximum(X, nan) -> nan - if (match(Op1, m_NaN())) - return PropagateNaN ? propagateNaN(cast(Op1)) : Op0; - - // In the following folds, inf can be replaced with the largest finite - // float, if the ninf flag is set. - const APFloat *C; - if (match(Op1, m_APFloat(C)) && - (C->isInfinity() || (Call && Call->hasNoInfs() && C->isLargest()))) { - // minnum(X, -inf) -> -inf - // maxnum(X, +inf) -> +inf - // minimum(X, -inf) -> -inf if nnan - // maximum(X, +inf) -> +inf if nnan - if (C->isNegative() == IsMin && - (!PropagateNaN || (Call && Call->hasNoNaNs()))) - return ConstantFP::get(ReturnType, *C); - - // minnum(X, +inf) -> X if nnan - // maxnum(X, -inf) -> X if nnan - // minimum(X, +inf) -> X - // maximum(X, -inf) -> X - if (C->isNegative() != IsMin && - (PropagateNaN || (Call && Call->hasNoNaNs()))) - return Op0; + if (OptResult == MinMaxOptResult::UseOtherVal || + OptResult == MinMaxOptResult::UseEither) + return Op0; // Return the other arg (ignoring NaN quieting) + else if (OptResult == MinMaxOptResult::UseNewConstVal) + return NewConst; } // Min/max of the same operation with common operand: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 6f11b250cf21f..bf87d9bbb0294 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -9067,6 +9067,10 @@ Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) { case Intrinsic::minimum: return Intrinsic::maximum; case Intrinsic::maxnum: return Intrinsic::minnum; case Intrinsic::minnum: return Intrinsic::maxnum; + case Intrinsic::maximumnum: + return Intrinsic::minimumnum; + case Intrinsic::minimumnum: + return Intrinsic::maximumnum; default: llvm_unreachable("Unexpected intrinsic"); } } diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index ab51693198a30..05d3e9c381910 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace ret void } -; FIXME: Should there be more checks here? minnum with NaN operand is simplified away. +; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN. ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32: -; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]] -; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]] -; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]] +; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll index e687745469014..017f24d47e1cb 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -2055,8 +2055,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 { ; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GCN-NEXT: v_max_f32_e32 v0, v0, v0 +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %min = call float @llvm.minimumnum.f32(float %a, float %a) %min.fneg = fneg float %min @@ -2067,7 +2066,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 { ; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %min = call float @llvm.minimumnum.f32(float %a, float %a) %min.fneg = fneg float %min @@ -2391,8 +2390,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 { ; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 -; GCN-NEXT: v_min_f32_e32 v0, v0, v0 +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %max = call float @llvm.maximumnum.f32(float %a, float %a) %max.fneg = fneg float %max @@ -2403,7 +2401,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 { ; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0 +; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %max = call float @llvm.maximumnum.f32(float %a, float %a) %max.fneg = fneg float %max diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll index 361a2b8280910..378ca1fa44c17 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll @@ -269,42 +269,27 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 { } define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 { -; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32( -; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE1-NEXT: ret float [[X]] -; -; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32( -; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) -; IEEE0-NEXT: ret float [[MED3]] +; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float [[X]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000) ret float %med3 } define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 { -; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32( -; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE1-NEXT: ret float [[X]] -; -; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32( -; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) -; IEEE0-NEXT: ret float [[MED3]] +; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float [[X]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000) ret float %med3 } define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 { -; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32( -; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE1-NEXT: ret float [[X]] -; -; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32( -; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000) -; IEEE0-NEXT: ret float [[MED3]] +; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32( +; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: ret float [[X]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x) ret float %med3 @@ -448,8 +433,7 @@ define float @fmed3_snan1_x_snan2_f32(float %x) #1 { ; ; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32( ; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000) -; IEEE0-NEXT: ret float [[MED3]] +; IEEE0-NEXT: ret float [[X]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000) ret float %med3 @@ -462,8 +446,7 @@ define float @fmed3_x_snan1_snan2_f32(float %x) #1 { ; ; IEEE0-LABEL: define float @fmed3_x_snan1_snan2_f32( ; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000) -; IEEE0-NEXT: ret float [[MED3]] +; IEEE0-NEXT: ret float [[X]] ; %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000) ret float %med3 diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index 26b51146057e9..3a03f8627ab68 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -6,12 +6,12 @@ ;############################################################### ; minnum(X, qnan) -> X ; maxnum(X, qnan) -> X -; TODO: minnum(X, snan) -> qnan (currently we treat SNaN the same as QNaN) -; TODO: maxnum(X, snan) -> qnan (currently we treat SNaN the same as QNaN) +; minnum(X, snan) -> qnan +; maxnum(X, snan) -> qnan ; minimum(X, nan) -> qnan ; maximum(X, nan) -> qnan -; TODO: minimumnum(X, nan) -> X -; TODO: maximumnum(X, nan) -> X +; minimumnum(X, nan) -> X +; maximumnum(X, nan) -> X define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_qnan_f32( @@ -19,10 +19,8 @@ define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %mi ; CHECK-NEXT: store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4 ; CHECK-NEXT: store float 0x7FFF000000000000, ptr [[MINIMUM_RES:%.*]], align 4 ; CHECK-NEXT: store float 0x7FFF000000000000, ptr [[MAXIMUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FFF000000000000) -; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FFF000000000000) -; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void ; %minnum = call float @llvm.minnum.f32(float %x, float 0x7FFF000000000000) @@ -42,17 +40,15 @@ define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %mi ret void } -; TODO currently snan is treated the same as qnan, but maxnum/minnum should really return qnan for these cases, not X +; Note that maxnum/minnum return qnan here for snan inputs, unlike maximumnum/minimumnum define void @minmax_snan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_snan_f32( -; CHECK-NEXT: store float [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 4 -; CHECK-NEXT: store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0x7FFC000000000000, ptr [[MINNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0x7FFC000000000000, ptr [[MAXNUM_RES:%.*]], align 4 ; CHECK-NEXT: store float 0x7FFC000000000000, ptr [[MINIMUM_RES:%.*]], align 4 ; CHECK-NEXT: store float 0x7FFC000000000000, ptr [[MAXIMUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF4000000000000) -; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FF4000000000000) -; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X:%.*]], ptr [[MINIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void ; %minnum = call float @llvm.minnum.f32(float %x, float 0x7FF4000000000000) @@ -78,10 +74,8 @@ define void @minmax_qnan_nxv2f64_op0( %x, ptr %minnum_res, ; CHECK-NEXT: store [[X]], ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store splat (double 0x7FF8000DEAD00000), ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store splat (double 0x7FF8000DEAD00000), ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call @llvm.minimumnum.nxv2f64( splat (double 0x7FF8000DEAD00000), [[X]]) -; CHECK-NEXT: store [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call @llvm.maximumnum.nxv2f64( splat (double 0x7FF8000DEAD00000), [[X]]) -; CHECK-NEXT: store [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call @llvm.minnum.nxv2f64( splat (double 0x7FF8000DEAD00000), %x) @@ -101,17 +95,15 @@ define void @minmax_qnan_nxv2f64_op0( %x, ptr %minnum_res, ret void } -; TODO currently snan is treated the same as qnan, but maxnum/minnum should really return qnan for these cases, not X +; Note that maxnum/minnum return qnan here for snan inputs, unlike maximumnum/minimumnum define void @minmax_snan_nxv2f64_op1( %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_snan_nxv2f64_op1( -; CHECK-NEXT: store [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16 -; CHECK-NEXT: store [[X]], ptr [[MAXNUM_RES:%.*]], align 16 +; CHECK-NEXT: store splat (double 0x7FFC00DEAD00DEAD), ptr [[MINNUM_RES:%.*]], align 16 +; CHECK-NEXT: store splat (double 0x7FFC00DEAD00DEAD), ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store splat (double 0x7FFC00DEAD00DEAD), ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store splat (double 0x7FFC00DEAD00DEAD), ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call @llvm.minimumnum.nxv2f64( splat (double 0x7FF400DEAD00DEAD), [[X]]) -; CHECK-NEXT: store [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call @llvm.maximumnum.nxv2f64( splat (double 0x7FF400DEAD00DEAD), [[X]]) -; CHECK-NEXT: store [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X:%.*]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call @llvm.minnum.nxv2f64( splat (double 0x7FF400DEAD00DEAD), %x) @@ -131,17 +123,18 @@ define void @minmax_snan_nxv2f64_op1( %x, ptr %minnum_res, ret void } -; TODO Currently, we treat SNaN and QNaN the same. However, for maxnum and minnum, we should not optimize this, as we should return <%x0, QNaN> instead of <%x0, %x1> +; For maxnum and minnum, we cannot optimize this in InstSimplify, as the result should +; return <%x0, QNaN> and InstSimplify cannot create the extra instructions required to construct this. define void @minmax_mixed_snan_qnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_mixed_snan_qnan_v2f64( -; CHECK-NEXT: store <2 x double> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16 -; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16 +; CHECK-NEXT: [[MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> [[X:%.*]]) +; CHECK-NEXT: store <2 x double> [[MINNUM]], ptr [[MINNUM_RES:%.*]], align 16 +; CHECK-NEXT: [[MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> , <2 x double> [[X]]) +; CHECK-NEXT: store <2 x double> [[MAXNUM]], ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> , ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> , ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> , <2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> , <2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) @@ -169,10 +162,8 @@ define void @minmax_mixed_qnan_poison_v2f64(<2 x double> %x, ptr %minnum_res, pt ; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> , ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> , ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> , <2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> , <2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) @@ -201,10 +192,8 @@ define void @minmax_poison_op0_f16(half %x, ptr %minnum_res, ptr %maxnum_res, pt ; CHECK-NEXT: store half [[X]], ptr [[MAXNUM_RES:%.*]], align 2 ; CHECK-NEXT: store half [[X]], ptr [[MINIMUM_RES:%.*]], align 2 ; CHECK-NEXT: store half [[X]], ptr [[MAXIMUM_RES:%.*]], align 2 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call half @llvm.minimumnum.f16(half poison, half [[X]]) -; CHECK-NEXT: store half [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 2 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call half @llvm.maximumnum.f16(half poison, half [[X]]) -; CHECK-NEXT: store half [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 2 +; CHECK-NEXT: store half [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 2 +; CHECK-NEXT: store half [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 2 ; CHECK-NEXT: ret void ; %minnum = call half @llvm.minnum.f16(half poison, half %x) @@ -230,10 +219,8 @@ define void @minmax_poison_op1_nxv2f64( %x, ptr %minnum_res ; CHECK-NEXT: store [[X]], ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store [[X]], ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store [[X]], ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call nnan @llvm.minimumnum.nxv2f64( [[X]], poison) -; CHECK-NEXT: store [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan @llvm.maximumnum.nxv2f64( [[X]], poison) -; CHECK-NEXT: store [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call nnan @llvm.minnum.nxv2f64( %x, poison) @@ -260,10 +247,10 @@ define void @minmax_poison_op1_nxv2f64( %x, ptr %minnum_res ; minnum(X, +inf) -> X if nnan (ignoring NaN quieting) ; maximum(X, +inf) -> +inf if nnan ; minimum(X, +inf) -> X (ignoring NaN quieting) -; TODO: maximumnum(X, +inf) -> +inf -; TODO: minimumnum(X, +inf) -> X if nnan (ignoring NaN quieting) +; maximumnum(X, +inf) -> +inf +; minimumnum(X, +inf) -> X if nnan (ignoring NaN quieting) -; Can only optimize maxnum and minimum without the nnan flag +; Can only optimize maxnum, minimum, and maximumnum without the nnan flag define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_pos_inf_f32( ; CHECK-NEXT: [[MINNUM:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0x7FF0000000000000) @@ -274,8 +261,7 @@ define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr ; CHECK-NEXT: store float [[MAXIMUM]], ptr [[MAXIMUM_RES:%.*]], align 4 ; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000000000000) ; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FF0000000000000) -; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0x7FF0000000000000, ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void ; %minnum = call float @llvm.minnum.f32(float %x, float 0x7FF0000000000000) @@ -296,17 +282,14 @@ define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr } ; Can optimize all minmax variants if the nnan flag is set -; TODO maximumnum/minimumnum define void @minmax_pos_inf_nnan_v2f32(<2 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_pos_inf_nnan_v2f32( ; CHECK-NEXT: store <2 x float> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXNUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> [[X]], ptr [[MINIMUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXIMUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan <2 x float> @llvm.maximumnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: ret void ; %minnum = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> %x) @@ -333,10 +316,10 @@ define void @minmax_pos_inf_nnan_v2f32(<2 x float> %x, ptr %minnum_res, ptr %max ; maxnum(X, -inf) -> X if nnan ; minimum(X, -inf) -> -inf if nnan ; maximum(X, -inf) -> X (Ignoring NaN quieting) -; TODO: minimumnum(X, -inf) -> -inf -; TODO: maximumnum(X, -inf) -> X if nnan +; minimumnum(X, -inf) -> -inf +; maximumnum(X, -inf) -> X if nnan -; Can only optimize minnum and maximum without the nnan flag +; Can only optimize minnum, maximum, and minimumnum without the nnan flag define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_neg_inf_f32( ; CHECK-NEXT: store float 0xFFF0000000000000, ptr [[MINNUM_RES:%.*]], align 4 @@ -345,8 +328,7 @@ define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr ; CHECK-NEXT: [[MINIMUM:%.*]] = call float @llvm.minimum.f32(float [[X]], float 0xFFF0000000000000) ; CHECK-NEXT: store float [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 4 ; CHECK-NEXT: store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0xFFF0000000000000) -; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0xFFF0000000000000, ptr [[MINIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0xFFF0000000000000) ; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void @@ -369,17 +351,14 @@ define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr } ; Can optimize all minmax variants if the nnan flag is set -; TODO maximumnum/minimumnum define void @minmax_neg_inf_nnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_neg_inf_nnan_v2f64( ; CHECK-NEXT: store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINNUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> [[X:%.*]], ptr [[MAXNUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINIMUM_RES:%.*]], align 16 ; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXIMUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[X]], <2 x double> splat (double 0xFFF0000000000000)) -; CHECK-NEXT: store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[X]], <2 x double> splat (double 0xFFF0000000000000)) -; CHECK-NEXT: store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 ; CHECK-NEXT: ret void ; %minnum = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> splat (double 0xFFF0000000000000)) @@ -406,8 +385,8 @@ define void @minmax_neg_inf_nnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %ma ; minnum(X, +largest) -> X if ninf && nnan ; maximum(X, +largest) -> +largest if ninf && nnan ; minimum(X, +largest) -> X if ninf (ignoring quieting of sNaNs) -; TODO: maximumnum(X, +largest) -> +largest if ninf && nnan -; TODO: minimumnum(X, +largest) -> X if ninf && nnan +; maximumnum(X, +largest) -> +largest if ninf +; minimumnum(X, +largest) -> X if ninf && nnan ; None of these should be optimized away without the nnan/ninf flags define void @minmax_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { @@ -443,7 +422,7 @@ define void @minmax_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr ret void } -; We can optimize maxnum and minimum if we know ninf is set +; We can optimize maxnum, minimum, and maximumnum if we know ninf is set define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_largest_f32_ninf( ; CHECK-NEXT: [[MINNUM:%.*]] = call ninf float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000) @@ -454,8 +433,7 @@ define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, ; CHECK-NEXT: store float [[MAXIMUM]], ptr [[MAXIMUM_RES:%.*]], align 4 ; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call ninf float @llvm.minimumnum.f32(float [[X]], float 0x47EFFFFFE0000000) ; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call ninf float @llvm.maximumnum.f32(float [[X]], float 0x47EFFFFFE0000000) -; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0x47EFFFFFE0000000, ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void ; %minnum = call ninf float @llvm.minnum.f32(float %x, float 0x47EFFFFFE0000000) @@ -476,17 +454,14 @@ define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, } ; All can be optimized if both the ninf and nnan flags are set (ignoring SNaN propagation in minnum/maxnum) -; TODO maximumnum/minimumnum define void @minmax_largest_v2f32_ninf_nnan(<2 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_largest_v2f32_ninf_nnan( ; CHECK-NEXT: store <2 x float> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXNUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> [[X]], ptr [[MINIMUM_RES:%.*]], align 8 ; CHECK-NEXT: store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXIMUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call nnan ninf <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> splat (float 0x47EFFFFFE0000000)) -; CHECK-NEXT: store <2 x float> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan ninf <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> splat (float 0x47EFFFFFE0000000)) -; CHECK-NEXT: store <2 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: ret void ; %minnum = call ninf nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> splat (float 0x47EFFFFFE0000000)) @@ -513,8 +488,8 @@ define void @minmax_largest_v2f32_ninf_nnan(<2 x float> %x, ptr %minnum_res, ptr ; minnum(X, -largest) -> -largest if ninf (ignoring SNaN -> QNaN propagation) ; maximum(X, -largest) -> X if ninf (ignoring quieting of sNaNs) ; minimum(X, -largest) -> -largest if ninf && nnan -; TODO: maximumnum(X, -largest) -> X if ninf && nnan -; TODO: minimumnum(X, -largest) -> -largest if ninf +; maximumnum(X, -largest) -> X if ninf && nnan +; minimumnum(X, -largest) -> -largest if ninf ; None of these should be optimized away without the nnan/ninf flags define void @minmax_neg_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { @@ -550,7 +525,7 @@ define void @minmax_neg_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ret void } -; We can optimize minnum and maximum if we know ninf is set +; We can optimize minnum, maximum, and minimumnum if we know ninf is set define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_neg_largest_f32_ninf( ; CHECK-NEXT: store float 0xC7EFFFFFE0000000, ptr [[MINNUM_RES:%.*]], align 4 @@ -559,8 +534,7 @@ define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_ ; CHECK-NEXT: [[MINIMUM:%.*]] = call ninf float @llvm.minimum.f32(float [[X]], float 0xC7EFFFFFE0000000) ; CHECK-NEXT: store float [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 4 ; CHECK-NEXT: store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call ninf float @llvm.minimumnum.f32(float [[X]], float 0xC7EFFFFFE0000000) -; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float 0xC7EFFFFFE0000000, ptr [[MINIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call ninf float @llvm.maximumnum.f32(float [[X]], float 0xC7EFFFFFE0000000) ; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void @@ -583,17 +557,14 @@ define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_ } ; All can be optimized if both the ninf and nnan flags are set (ignoring SNaN propagation in minnum/maxnum) -; TODO maximumnum/minimumnum define void @minmax_neg_largest_nxv2f32_nnan_ninf( %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { ; CHECK-LABEL: @minmax_neg_largest_nxv2f32_nnan_ninf( ; CHECK-NEXT: store splat (float 0xC7EFFFFFE0000000), ptr [[MINNUM_RES:%.*]], align 8 ; CHECK-NEXT: store [[X:%.*]], ptr [[MAXNUM_RES:%.*]], align 8 ; CHECK-NEXT: store splat (float 0xC7EFFFFFE0000000), ptr [[MINIMUM_RES:%.*]], align 8 ; CHECK-NEXT: store [[X]], ptr [[MAXIMUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call nnan ninf @llvm.minimumnum.nxv2f32( [[X]], splat (float 0xC7EFFFFFE0000000)) -; CHECK-NEXT: store [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan ninf @llvm.maximumnum.nxv2f32( [[X]], splat (float 0xC7EFFFFFE0000000)) -; CHECK-NEXT: store [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store splat (float 0xC7EFFFFFE0000000), ptr [[MINIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: ret void ; %minnum = call nnan ninf @llvm.minnum.nxv2f32( %x, splat (float 0xC7EFFFFFE0000000)) @@ -613,6 +584,80 @@ define void @minmax_neg_largest_nxv2f32_nnan_ninf( %x, ptr % ret void } +;############################################################### +;# Mixed Constant Vector Elements # +;############################################################### +; Tests elementwise handling of different combinations of the above optimizable constants + +; Test with vector variants (v2f64) with +Inf and poison +; Poison element allows for flexibility to choose either X or where applicable +define void @minmax_mixed_pos_inf_poison_v2f64_nnan(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { +; CHECK-LABEL: @minmax_mixed_pos_inf_poison_v2f64_nnan( +; CHECK-NEXT: store <2 x double> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> , ptr [[MAXNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MINIMUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> , ptr [[MAXIMUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <2 x double> , ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: ret void +; + %minnum = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %minnum, ptr %minnum_res + %maxnum = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %maxnum, ptr %maxnum_res + + %minimum = call nnan <2 x double> @llvm.minimum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %minimum, ptr %minimum_res + %maximum = call nnan <2 x double> @llvm.maximum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %maximum, ptr %maximum_res + + %minimumnum = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %minimumnum, ptr %minimumnum_res + %maximumnum = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> , <2 x double> %x) + store <2 x double> %maximumnum, ptr %maximumnum_res + ret void +} + +; Tests to show that we can optimize different classes of constatn (inf/nan/poison) in different vector elements. +; We can only optimize if the result would be choosing all elements of the input X, or all constant elements though +; (where poison allows us to choose either). +; +; nnan minnum(, X) = (Cannot mix elements from X and constant vector) +; nnan maxnum(, X) = +; nnan minimum(, X) = (Cannot mix elements from X and constant vector) +; nnan maximum(, X) = +; nnan minimumnum(, X) = (Poison can be either X or constant value) +; nnan maximumnum(, X) = +define void @minmax_mixed_pos_inf_poison_snan_v3f32(<3 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) { +; CHECK-LABEL: @minmax_mixed_pos_inf_poison_snan_v3f32( +; CHECK-NEXT: [[MINNUM:%.*]] = call nnan <3 x float> @llvm.minnum.v3f32(<3 x float> , <3 x float> [[X:%.*]]) +; CHECK-NEXT: store <3 x float> [[MINNUM]], ptr [[MINNUM_RES:%.*]], align 16 +; CHECK-NEXT: store <3 x float> , ptr [[MAXNUM_RES:%.*]], align 16 +; CHECK-NEXT: [[MINIMUM:%.*]] = call nnan <3 x float> @llvm.minimum.v3f32(<3 x float> , <3 x float> [[X]]) +; CHECK-NEXT: store <3 x float> [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 16 +; CHECK-NEXT: store <3 x float> , ptr [[MAXIMUM_RES:%.*]], align 16 +; CHECK-NEXT: store <3 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> , <3 x float> [[X]]) +; CHECK-NEXT: store <3 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16 +; CHECK-NEXT: ret void +; + %minnum = call nnan <3 x float> @llvm.minnum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %minnum, ptr %minnum_res + %maxnum = call nnan <3 x float> @llvm.maxnum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %maxnum, ptr %maxnum_res + + %minimum = call nnan <3 x float> @llvm.minimum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %minimum, ptr %minimum_res + %maximum = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %maximum, ptr %maximum_res + + %minimumnum = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %minimumnum, ptr %minimumnum_res + %maximumnum = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> , <3 x float> %x) + store <3 x float> %maximumnum, ptr %maximumnum_res + ret void +} + ;############################################################### ;# Min(x, x) / Max(x, x) # ;############################################################### @@ -623,10 +668,8 @@ define void @minmax_same_args(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %m ; CHECK-NEXT: store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4 ; CHECK-NEXT: store float [[X]], ptr [[MINIMUM_RES:%.*]], align 4 ; CHECK-NEXT: store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[X]]) -; CHECK-NEXT: store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4 -; CHECK-NEXT: [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[X]]) -; CHECK-NEXT: store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 4 +; CHECK-NEXT: store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4 ; CHECK-NEXT: ret void ; %minnum = call float @llvm.minnum.f32(float %x, float %x) @@ -660,11 +703,9 @@ define void @minmax_x_minmax_xy(<2 x float> %x, <2 x float> %y, ptr %minnum_res, ; CHECK-NEXT: [[MAXIMUM_XY:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X]], <2 x float> [[Y]]) ; CHECK-NEXT: store <2 x float> [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 8 ; CHECK-NEXT: [[MINIMUMNUM_XY:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> [[Y]]) -; CHECK-NEXT: [[MINIMUMNUM_NESTED:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> [[MINIMUMNUM_XY]]) -; CHECK-NEXT: store <2 x float> [[MINIMUMNUM_NESTED]], ptr [[MINIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: [[MAXIMUMNUM_XY:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> [[Y]]) -; CHECK-NEXT: [[MAXIMUMNUM_NESTED:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> [[MAXIMUMNUM_XY]]) -; CHECK-NEXT: store <2 x float> [[MAXIMUMNUM_NESTED]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store <2 x float> [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: ret void ; %minnum_xy = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) @@ -758,13 +799,9 @@ define void @minmax_minmax_xy_minmax_yx(half %x, half %y, ptr %minnum_res, ptr % ; CHECK-NEXT: [[MAXIMUM_XY:%.*]] = call half @llvm.maximum.f16(half [[X]], half [[Y]]) ; CHECK-NEXT: store half [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 2 ; CHECK-NEXT: [[MINIMUMNUM_XY:%.*]] = call half @llvm.minimumnum.f16(half [[X]], half [[Y]]) -; CHECK-NEXT: [[MINIMUMNUM_YX:%.*]] = call half @llvm.minimumnum.f16(half [[Y]], half [[X]]) -; CHECK-NEXT: [[FINAL_MINIMUMNUM:%.*]] = call half @llvm.minimumnum.f16(half [[MINIMUMNUM_XY]], half [[MINIMUMNUM_YX]]) -; CHECK-NEXT: store half [[FINAL_MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 2 +; CHECK-NEXT: store half [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 2 ; CHECK-NEXT: [[MAXIMUMNUM_XY:%.*]] = call half @llvm.maximumnum.f16(half [[X]], half [[Y]]) -; CHECK-NEXT: [[MAXIMUMNUM_YX:%.*]] = call half @llvm.maximumnum.f16(half [[Y]], half [[X]]) -; CHECK-NEXT: [[FINAL_MAXIMUMNUM:%.*]] = call half @llvm.maximumnum.f16(half [[MAXIMUMNUM_XY]], half [[MAXIMUMNUM_YX]]) -; CHECK-NEXT: store half [[FINAL_MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 2 +; CHECK-NEXT: store half [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 2 ; CHECK-NEXT: ret void ; %minnum_xy = call half @llvm.minnum.f16(half %x, half %y) @@ -812,13 +849,9 @@ define void @minmax_minmax_xy_maxmin_yx(double %x, double %y, ptr %minnum_res, p ; CHECK-NEXT: [[MAXIMUM_XY:%.*]] = call double @llvm.maximum.f64(double [[Y]], double [[X]]) ; CHECK-NEXT: store double [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 8 ; CHECK-NEXT: [[MINIMUMNUM_XY:%.*]] = call double @llvm.minimumnum.f64(double [[Y]], double [[X]]) -; CHECK-NEXT: [[MAXIMUMNUM_XY:%.*]] = call double @llvm.maximumnum.f64(double [[X]], double [[Y]]) -; CHECK-NEXT: [[FINAL_MINIMUMNUM:%.*]] = call double @llvm.minimumnum.f64(double [[MINIMUMNUM_XY]], double [[MAXIMUMNUM_XY]]) -; CHECK-NEXT: store double [[FINAL_MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8 -; CHECK-NEXT: [[MAXIMUMNUM_XY1:%.*]] = call double @llvm.maximumnum.f64(double [[Y]], double [[X]]) -; CHECK-NEXT: [[MINIMUMNUM_YX:%.*]] = call double @llvm.minimumnum.f64(double [[X]], double [[Y]]) -; CHECK-NEXT: [[FINAL_MAXIMUMNUM:%.*]] = call double @llvm.maximumnum.f64(double [[MAXIMUMNUM_XY1]], double [[MINIMUMNUM_YX]]) -; CHECK-NEXT: store double [[FINAL_MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: store double [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 8 +; CHECK-NEXT: [[MAXIMUMNUM_XY:%.*]] = call double @llvm.maximumnum.f64(double [[Y]], double [[X]]) +; CHECK-NEXT: store double [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 8 ; CHECK-NEXT: ret void ; %minnum_xy = call double @llvm.minnum.f64(double %x, double %y)