Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions base/floatfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -375,24 +375,31 @@ function fma_emulated(a::Float64, b::Float64,c::Float64)
return aandbfinite ? c : abhi+c
end
(iszero(a) || iszero(b)) && return abhi+c
bias = exponent(a) + exponent(b)
# The checks above satisfy exponent's nothrow precondition
bias = Math._exponent_finite_nonzero(a) + Math._exponent_finite_nonzero(b)
c_denorm = ldexp(c, -bias)
if isfinite(c_denorm)
# rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a))
issubnormal(a) && (a *= 0x1p52)
issubnormal(b) && (b *= 0x1p52)
a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000)
b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000)
a = reinterpret(Float64, (reinterpret(UInt64, a) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64))
b = reinterpret(Float64, (reinterpret(UInt64, b) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64))
c = c_denorm
abhi, ablo = twomul(a,b)
# abhi <= 4 -> isfinite(r) (α)
r = abhi+c
# s ≈ 0 (β)
s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
# α ⩓ β -> isfinite(sumhi) (γ)
sumhi = r+s
# If result is subnormal, ldexp will cause double rounding because subnormals have fewer mantisa bits.
# As such, we need to check whether round to even would lead to double rounding and manually round sumhi to avoid it.
if issubnormal(ldexp(sumhi, bias))
sumlo = r-sumhi+s
bits_lost = -bias-exponent(sumhi)-1022
# finite: See γ
# non-zero: If sumhi == ±0., then ldexp(sumhi, bias) == ±0,
# so we don't take this branch.
bits_lost = -bias-Math._exponent_finite_nonzero(sumhi)-1022
sumhiInt = reinterpret(UInt64, sumhi)
if (bits_lost != 1) ⊻ (sumhiInt&1 == 1)
sumhi = nextfloat(sumhi, cmp(sumlo,0))
Expand Down
39 changes: 33 additions & 6 deletions base/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export sin, cos, sincos, tan, sinh, cosh, tanh, asin, acos, atan,
import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
max, min, minmax, ^, exp2, muladd, rem,
exp10, expm1, log1p
exp10, expm1, log1p, @constprop

using .Base: sign_mask, exponent_mask, exponent_one,
exponent_half, uinttype, significand_mask,
Expand Down Expand Up @@ -784,7 +784,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
xu = reinterpret(Unsigned, x)
xs = xu & ~sign_mask(T)
xs >= exponent_mask(T) && return x # NaN or Inf
k = Int(xs >> significand_bits(T))
k = (xs >> significand_bits(T)) % Int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the difference between these?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Int throws if the sign bit is set and inference does not have any range modeling to know that that is impossible.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. That's somewhat unfortunate, but this is a good fix then.

if k == 0 # x is subnormal
xs == 0 && return x # +-0
m = leading_zeros(xs) - exponent_bits(T)
Expand Down Expand Up @@ -817,7 +817,8 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
return flipsign(T(0.0), x)
end
k += significand_bits(T)
z = T(2.0)^-significand_bits(T)
# z = T(2.0) ^ (-significand_bits(T))
z = reinterpret(T, rem(exponent_bias(T)-significand_bits(T), uinttype(T)) << significand_bits(T))
xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
return z*reinterpret(T, xu)
end
Expand All @@ -841,7 +842,7 @@ julia> exponent(16.0)
"""
function exponent(x::T) where T<:IEEEFloat
@noinline throw1(x) = throw(DomainError(x, "Cannot be NaN or Inf."))
@noinline throw2(x) = throw(DomainError(x, "Cannot be subnormal converted to 0."))
@noinline throw2(x) = throw(DomainError(x, "Cannot be ±0.0."))
xs = reinterpret(Unsigned, x) & ~sign_mask(T)
xs >= exponent_mask(T) && throw1(x)
k = Int(xs >> significand_bits(T))
Expand All @@ -853,6 +854,21 @@ function exponent(x::T) where T<:IEEEFloat
return k - exponent_bias(T)
end

# Like exponent, but assumes the nothrow precondition. For
# internal use only. Could be written as
# @assume_effects :nothrow exponent()
# but currently this form is easier on the compiler.
function _exponent_finite_nonzero(x::T) where T<:IEEEFloat
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This deserves a better name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's exponent, but only for for finite and nonzero values. Seems perfectly descriptive to me.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

understood, but given that it's something that I will probably want to use, I think something like nothrow_exponent with documentation as to when it gives wrong answers would be better.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't have any precondition enforcement, I do like having the precondition in the name, so it's obvious to people reading it at the callsite what is being asserted here.

# @precond :nothrow !isnan(x) && !isinf(x) && !iszero(x)
xs = reinterpret(Unsigned, x) & ~sign_mask(T)
k = rem(xs >> significand_bits(T), Int)
if k == 0 # x is subnormal
m = leading_zeros(xs) - exponent_bits(T)
k = 1 - m
end
return k - exponent_bias(T)
end

"""
significand(x)

Expand Down Expand Up @@ -977,11 +993,17 @@ function modf(x::T) where T<:IEEEFloat
return (rx, ix)
end

function ^(x::Float64, y::Float64)
# @constprop aggressive to help the compiler see the switch between the integer and float
# variants for callers with constant `y`
@constprop :aggressive function ^(x::Float64, y::Float64)
yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result
y == yint && return x^yint
x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
x == 1 && return 1.0
return pow_body(x, y)
end

@inline function pow_body(x::Float64, y::Float64)
Comment on lines +1003 to +1006
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth putting this inside the function, so it is clearer the purpose of it?

Suggested change
return pow_body(x, y)
end
@inline function pow_body(x::Float64, y::Float64)
return (@inline function pow_body(x::Float64, y::Float64) ... end)(x, y)

!isfinite(x) && return x*(y>0 || isnan(x))
x==0 && return abs(y)*Inf*(!(y>0))
logxhi,logxlo = Base.Math._log_ext(x)
Expand All @@ -990,10 +1012,15 @@ function ^(x::Float64, y::Float64)
hi = xyhi+xylo
return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
end
function ^(x::T, y::T) where T <: Union{Float16, Float32}

@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result
y == yint && return x^yint
x < 0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
return pow_body(x, y)
end

@inline function pow_body(x::T, y::T) where T <: Union{Float16, Float32}
x == 1 && return one(T)
!isfinite(x) && return x*(y>0 || isnan(x))
x==0 && return abs(y)*T(Inf)*(!(y>0))
Expand Down