From 3fe529569151b950628cd0a70058fc1879cb5d17 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 22 Jan 2022 23:08:13 -0500 Subject: [PATCH 1/3] ldexp: Break inference loop We have an inference loop fma_emulated -> ldexp -> ^(::Float64, ::Int) -> fma -> fma_emulated. The arguments to `^` are constant, so constprop will figure it out, but it does require a bunch of extra processing. There is a simpler way to write this using elementary bit operations. Since resolving the inference loop requires constprop, this was breaking #43852. That is fixable, but I think we should also make this change to avoid having an unnecessary inference loop in our basic math functions, which will make future analyses easier. --- base/math.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/math.jl b/base/math.jl index 1206ac87bb254..3c16943290cc3 100644 --- a/base/math.jl +++ b/base/math.jl @@ -784,7 +784,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat xu = reinterpret(Unsigned, x) xs = xu & ~sign_mask(T) xs >= exponent_mask(T) && return x # NaN or Inf - k = Int(xs >> significand_bits(T)) + k = (xs >> significand_bits(T)) % Int if k == 0 # x is subnormal xs == 0 && return x # +-0 m = leading_zeros(xs) - exponent_bits(T) @@ -817,7 +817,8 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat return flipsign(T(0.0), x) end k += significand_bits(T) - z = T(2.0)^-significand_bits(T) + # z = T(2.0) ^ (-significand_bits(T)) + z = reinterpret(T, rem(exponent_bias(T)-significand_bits(T), uinttype(T)) << significand_bits(T)) xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T)) return z*reinterpret(T, xu) end From 16887888d6110e24d1165702a908b9e0f740e97d Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 23 Jan 2022 17:27:05 -0500 Subject: [PATCH 2/3] Make fma_emulated easier for the compiler to reason about The fact that the `exponent` call in `fma_emulated` requires reasoning about the ranges of the floating point values in question, which the compiler is not capable of doing (and is unlikely to ever do automatically). Thus, in order for the compiler to know that `fma_emulated` (and by extension `fma`) is :nothrow in a post-#43852 world, create a separate version of the `exponent` function that assumes its precondition. We could use `@assume_effects` instead, but this version is currently slightly easier on the compiler. --- base/floatfuncs.jl | 15 +++++++++++---- base/math.jl | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index b22dd7c238aae..3963927f7e717 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -375,24 +375,31 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) return aandbfinite ? c : abhi+c end (iszero(a) || iszero(b)) && return abhi+c - bias = exponent(a) + exponent(b) + # The checks above satisfy exponent's nothrow precondition + bias = Math._exponent_finite_nonzero(a) + Math._exponent_finite_nonzero(b) c_denorm = ldexp(c, -bias) if isfinite(c_denorm) # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a)) issubnormal(a) && (a *= 0x1p52) issubnormal(b) && (b *= 0x1p52) - a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000) - b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000) + a = reinterpret(Float64, (reinterpret(UInt64, a) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64)) + b = reinterpret(Float64, (reinterpret(UInt64, b) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64)) c = c_denorm abhi, ablo = twomul(a,b) + # abhi <= 4 -> isfinite(r) (α) r = abhi+c + # s ≈ 0 (β) s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) + # α ⩓ β -> isfinite(sumhi) (γ) sumhi = r+s # If result is subnormal, ldexp will cause double rounding because subnormals have fewer mantisa bits. # As such, we need to check whether round to even would lead to double rounding and manually round sumhi to avoid it. if issubnormal(ldexp(sumhi, bias)) sumlo = r-sumhi+s - bits_lost = -bias-exponent(sumhi)-1022 + # finite: See γ + # non-zero: If sumhi == ±0., then ldexp(sumhi, bias) == ±0, + # so we don't take this branch. + bits_lost = -bias-Math._exponent_finite_nonzero(sumhi)-1022 sumhiInt = reinterpret(UInt64, sumhi) if (bits_lost != 1) ⊻ (sumhiInt&1 == 1) sumhi = nextfloat(sumhi, cmp(sumlo,0)) diff --git a/base/math.jl b/base/math.jl index 3c16943290cc3..92305365ba1e0 100644 --- a/base/math.jl +++ b/base/math.jl @@ -842,7 +842,7 @@ julia> exponent(16.0) """ function exponent(x::T) where T<:IEEEFloat @noinline throw1(x) = throw(DomainError(x, "Cannot be NaN or Inf.")) - @noinline throw2(x) = throw(DomainError(x, "Cannot be subnormal converted to 0.")) + @noinline throw2(x) = throw(DomainError(x, "Cannot be ±0.0.")) xs = reinterpret(Unsigned, x) & ~sign_mask(T) xs >= exponent_mask(T) && throw1(x) k = Int(xs >> significand_bits(T)) @@ -854,6 +854,21 @@ function exponent(x::T) where T<:IEEEFloat return k - exponent_bias(T) end +# Like exponent, but assumes the nothrow precondition. For +# internal use only. Could be written as +# @assume_effects :nothrow exponent() +# but currently this form is easier on the compiler. +function _exponent_finite_nonzero(x::T) where T<:IEEEFloat + # @precond :nothrow !isnan(x) && !isinf(x) && !iszero(x) + xs = reinterpret(Unsigned, x) & ~sign_mask(T) + k = rem(xs >> significand_bits(T), Int) + if k == 0 # x is subnormal + m = leading_zeros(xs) - exponent_bits(T) + k = 1 - m + end + return k - exponent_bias(T) +end + """ significand(x) From 4480b2ddd5cd4e10141895ade0ec9016fb715ea8 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 23 Jan 2022 20:40:37 -0500 Subject: [PATCH 3/3] pow: Make integer vs float branch obvious to constprop The integer branch is nothrow, so if the caller does something like `^(x::Float64, 2.0)`, we'd like to discover that. --- base/math.jl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/base/math.jl b/base/math.jl index 92305365ba1e0..e94c6da8ef507 100644 --- a/base/math.jl +++ b/base/math.jl @@ -18,7 +18,7 @@ export sin, cos, sincos, tan, sinh, cosh, tanh, asin, acos, atan, import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin, acos, atan, asinh, acosh, atanh, sqrt, log2, log10, max, min, minmax, ^, exp2, muladd, rem, - exp10, expm1, log1p + exp10, expm1, log1p, @constprop using .Base: sign_mask, exponent_mask, exponent_one, exponent_half, uinttype, significand_mask, @@ -993,11 +993,17 @@ function modf(x::T) where T<:IEEEFloat return (rx, ix) end -function ^(x::Float64, y::Float64) +# @constprop aggressive to help the compiler see the switch between the integer and float +# variants for callers with constant `y` +@constprop :aggressive function ^(x::Float64, y::Float64) yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result y == yint && return x^yint x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer x == 1 && return 1.0 + return pow_body(x, y) +end + +@inline function pow_body(x::Float64, y::Float64) !isfinite(x) && return x*(y>0 || isnan(x)) x==0 && return abs(y)*Inf*(!(y>0)) logxhi,logxlo = Base.Math._log_ext(x) @@ -1006,10 +1012,15 @@ function ^(x::Float64, y::Float64) hi = xyhi+xylo return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ)) end -function ^(x::T, y::T) where T <: Union{Float16, Float32} + +@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32} yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result y == yint && return x^yint x < 0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer + return pow_body(x, y) +end + +@inline function pow_body(x::T, y::T) where T <: Union{Float16, Float32} x == 1 && return one(T) !isfinite(x) && return x*(y>0 || isnan(x)) x==0 && return abs(y)*T(Inf)*(!(y>0))