From 622cd6d894f183a334eedfcfb293c6202be73f52 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Sat, 23 Oct 2021 23:05:26 -0400 Subject: [PATCH 01/12] put fma in the right spot --- base/floatfuncs.jl | 53 +++++++++++++++++++++++++++++++++++++++------- test/math.jl | 13 ++++++++++++ 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index bae27d642e7c7..9720eb4e1b8d4 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -342,30 +342,67 @@ significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in algorithms. See [`muladd`](@ref). """ function fma end +function fma_emulated(a::Float32, b::Float32, c::Float32) + return Float32(Float64(a) * Float64(b) + Float64(c)) +end + +@inline function splitbits(x::Float64) + xhi = reinterpret(Float64, reinterpret(UInt64, x) & 0xfffffffffc000000) + xlo = x-xhi + return xhi,xlo +end + +@inline function twomul(a::Float64, b::Float64) + ahi, alo = splitbits(a) + bhi, blo = splitbits(b) + abhi = ahi*bhi + ablo = alo*blo + ahi*blo + alo*bhi + return abhi, ablo +end -fma_libm(x::Float32, y::Float32, z::Float32) = - ccall(("fmaf", libm_name), Float32, (Float32,Float32,Float32), x, y, z) -fma_libm(x::Float64, y::Float64, z::Float64) = - ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z) +function fma_emulated(a::Float64, b::Float64,c::Float64) + abhi, ablo = twomul(a,b) + abhitemp = abhi+ablo + signab = sign(abhi) + if abhitemp == signab*Inf #rethink our life choices + if sign(c) == signab || abhi == 0.0 + return signab * Inf + else #compute a*b+c = a*(b+c/a) + inva = inv(a) + aochi = c * inva + aoclo = fma_emulated(-aochi, a, c) * inva # Yes this is recursive. Bite me + aocpbhi = b + aochi + aocpblo = (b-aocpbhi+aochi+aoclo) + reshi,reslo = twomul(aocpbhi, a) + return reshi + (reslo+a*aocpblo) + end + end + ablo = abhitemp - abhi + r = abhi+c + s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) + zh = r+s + return zh +end fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z) fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z) # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back -# onto a broken system libm; if so, use openlibm's fma instead +# onto a broken system libm; if so, use a software emulated fma # 1.0000305f0 = 1 + 1/2^15 # 1.0000000009313226 = 1 + 1/2^30 # If fma_llvm() clobbers the rounding mode, the result of 0.1 + 0.2 will be 0.3 # instead of the properly-rounded 0.30000000000000004; check after calling fma +# TODO actually detect fma in hardware and switch on that. if (Sys.ARCH !== :i686 && fma_llvm(1.0000305f0, 1.0000305f0, -1.0f0) == 6.103609f-5 && (fma_llvm(1.0000000009313226, 1.0000000009313226, -1.0) == 1.8626451500983188e-9) && 0.1 + 0.2 == 0.30000000000000004) fma(x::Float32, y::Float32, z::Float32) = fma_llvm(x,y,z) fma(x::Float64, y::Float64, z::Float64) = fma_llvm(x,y,z) else - fma(x::Float32, y::Float32, z::Float32) = fma_libm(x,y,z) - fma(x::Float64, y::Float64, z::Float64) = fma_libm(x,y,z) + fma(x::Float32, y::Float32, z::Float32) = fma_emulated(x,y,z) + fma(x::Float64, y::Float64, z::Float64) = fma_emulated(x,y,z) end function fma(a::Float16, b::Float16, c::Float16) - Float16(fma(Float32(a), Float32(b), Float32(c))) + Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it. end # This is necessary at least on 32-bit Intel Linux, since fma_llvm may diff --git a/test/math.jl b/test/math.jl index cdcc1c5c6a47d..78dc950a6aeee 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1286,3 +1286,16 @@ end @test_throws MethodError f(x) end end + +@testset "fma" begin + @test fma(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 + @test fma(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 + for T in (Float32, Float64) + @test fma(floatmax(T), T(2), -floatmax(T)) === floatmax(T) + @test fma(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) + @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) + @test isnan_type(T, fma(T(Inf), T(0), -T(0))) + end + @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 + @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 +end \ No newline at end of file From 3b2d796efb3c5d4f81d37db3d065150761dcdc6d Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Mon, 25 Oct 2021 13:15:14 -0400 Subject: [PATCH 02/12] working most of the time --- base/floatfuncs.jl | 24 ++++++++++-------------- test/math.jl | 30 ++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 9720eb4e1b8d4..9d5f69ccfcd1b 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -346,25 +346,22 @@ function fma_emulated(a::Float32, b::Float32, c::Float32) return Float32(Float64(a) * Float64(b) + Float64(c)) end -@inline function splitbits(x::Float64) - xhi = reinterpret(Float64, reinterpret(UInt64, x) & 0xfffffffffc000000) - xlo = x-xhi - return xhi,xlo -end - @inline function twomul(a::Float64, b::Float64) - ahi, alo = splitbits(a) - bhi, blo = splitbits(b) - abhi = ahi*bhi - ablo = alo*blo + ahi*blo + alo*bhi - return abhi, ablo + ahi = reinterpret(Float64, reinterpret(UInt64, a) & 0xffff_ffff_f800_0000) + alo = a-ahi + bhi = reinterpret(Float64, reinterpret(UInt64, b) & 0xffff_ffff_f800_0000) + blo = b-bhi + abhi = a*b + blohi = reinterpret(Float64, reinterpret(UInt64, blo) & 0xffff_ffff_f800_0000) + blolo = blo-blohi + ablo = alo*blohi - (((abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo + return abhi, ifelse(isfinite(abhi), ablo, abhi) end function fma_emulated(a::Float64, b::Float64,c::Float64) abhi, ablo = twomul(a,b) - abhitemp = abhi+ablo signab = sign(abhi) - if abhitemp == signab*Inf #rethink our life choices + if abhi == signab*Inf #rethink our life choices if sign(c) == signab || abhi == 0.0 return signab * Inf else #compute a*b+c = a*(b+c/a) @@ -377,7 +374,6 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) return reshi + (reslo+a*aocpblo) end end - ablo = abhitemp - abhi r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) zh = r+s diff --git a/test/math.jl b/test/math.jl index 78dc950a6aeee..f0e39e8a2cd67 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1288,14 +1288,24 @@ end end @testset "fma" begin - @test fma(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 - @test fma(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 - for T in (Float32, Float64) - @test fma(floatmax(T), T(2), -floatmax(T)) === floatmax(T) - @test fma(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) - @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) - @test isnan_type(T, fma(T(Inf), T(0), -T(0))) + for fma in (fma, Base.fma_emulated) + @test fma(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 + @test fma(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 + for T in (Float32, Float64) + @test fma(floatmax(T), T(2), -floatmax(T)) === floatmax(T) + @test fma(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) + @test fma(T(Inf), T(Inf), T(Inf)) === T(Inf) + @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) + @test isnan_type(T, fma(T(Inf), T(0), -T(0))) + + end + @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 + @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 + @test fma(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf + @test fma(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf + for _ in 1:2^18 + a, b, c = reinterpret.(Float64, rand(-4503599627370497:9218868437227405311, 3)) + @test Base.fma_emulated(a, b, c) === fma(a, b, c) || a,b,c + end end - @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 - @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 -end \ No newline at end of file +end From c82ef92d29c47d208648d099556e17fb79488b68 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Mon, 25 Oct 2021 17:14:35 -0400 Subject: [PATCH 03/12] stuff is broken --- base/floatfuncs.jl | 31 +++++++++++++++---------------- test/math.jl | 4 ++-- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 9d5f69ccfcd1b..bd734b98c8724 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -359,25 +359,24 @@ end end function fma_emulated(a::Float64, b::Float64,c::Float64) - abhi, ablo = twomul(a,b) - signab = sign(abhi) - if abhi == signab*Inf #rethink our life choices - if sign(c) == signab || abhi == 0.0 - return signab * Inf - else #compute a*b+c = a*(b+c/a) - inva = inv(a) - aochi = c * inva - aoclo = fma_emulated(-aochi, a, c) * inva # Yes this is recursive. Bite me - aocpbhi = b + aochi - aocpblo = (b-aocpbhi+aochi+aoclo) - reshi,reslo = twomul(aocpbhi, a) - return reshi + (reslo+a*aocpblo) - end + absab = abs(a*b) + if !isfinite(absab) || issubnormal(absab) || issubnormal(a) || issubnormal(b) || iszero(a) + !(isfinite(a) && isfinite(b) && isfinite(c)) && return a*b+c + iszero(a) || iszero(b) && return a*b+c + a_norm = reinterpret(Float64, reinterpret(UInt64, a) & 0xc00fffffffffffff) + b_norm = reinterpret(Float64, reinterpret(UInt64, b) & 0xc00fffffffffffff) + abhi, ablo = twomul(a_norm,b_norm) + bias = exponent(a) + exponent(b) + c_denorm = ldexp(c, bias) + isfinite(c_denorm) && return ldexp(fma_emulated(a_norm, b_norm, c_denorm), -bias) + c_denorm = ldexp(c, bias-54) + isfinite(c_denorm) && return ldexp(fma_emulated(ldexp(a_norm, -54), b_norm, c_denorm), bias+54) + return a*b+c end + abhi, ablo = twomul(a,b) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) - zh = r+s - return zh + return r+s end fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z) fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z) diff --git a/test/math.jl b/test/math.jl index f0e39e8a2cd67..c1f7b3ced0a77 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1297,7 +1297,7 @@ end @test fma(T(Inf), T(Inf), T(Inf)) === T(Inf) @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) @test isnan_type(T, fma(T(Inf), T(0), -T(0))) - + @test fma(-zero(T), zero(T), -zero(T)) === -zero(T) end @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 @@ -1305,7 +1305,7 @@ end @test fma(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf for _ in 1:2^18 a, b, c = reinterpret.(Float64, rand(-4503599627370497:9218868437227405311, 3)) - @test Base.fma_emulated(a, b, c) === fma(a, b, c) || a,b,c + @test Base.fma_emulated(a, b, c) === fma(a, b, c) || (a,b,c) end end end From c762c9c125fd76f055d8dcfeaf55be1993345410 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Tue, 26 Oct 2021 17:45:13 -0400 Subject: [PATCH 04/12] fix Float64, found bug in Float32 --- base/floatfuncs.jl | 65 +++++++++++++++++++++++++++++++--------------- test/math.jl | 10 +++---- 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index bd734b98c8724..70d13d40e9678 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -343,37 +343,60 @@ algorithms. See [`muladd`](@ref). """ function fma end function fma_emulated(a::Float32, b::Float32, c::Float32) - return Float32(Float64(a) * Float64(b) + Float64(c)) + ab = Float64(a) * b + result = ab+c + # If converting to Float32 doesn't cause rounding problems + (reinterpret(UInt64, result)&0x0000_0000_1fff_ffff != 0x1000_0000) && isequal(ab, result-c) && return Float32(result) + return result #TODO fixed. +end + +""" Splits a Float64 into a hi bit and a low bit where the high bit has 27 trailing 0s and the low bit has 26 trailing 0s""" +@inline function splitbits(x::Float64) + hi = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) + return hi, x-hi end @inline function twomul(a::Float64, b::Float64) - ahi = reinterpret(Float64, reinterpret(UInt64, a) & 0xffff_ffff_f800_0000) - alo = a-ahi - bhi = reinterpret(Float64, reinterpret(UInt64, b) & 0xffff_ffff_f800_0000) - blo = b-bhi + ahi, alo = splitbits(a) + bhi, blo = splitbits(b) abhi = a*b - blohi = reinterpret(Float64, reinterpret(UInt64, blo) & 0xffff_ffff_f800_0000) - blolo = blo-blohi + blohi, blolo = splitbits(blo) ablo = alo*blohi - (((abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo - return abhi, ifelse(isfinite(abhi), ablo, abhi) + return abhi, ablo end function fma_emulated(a::Float64, b::Float64,c::Float64) - absab = abs(a*b) - if !isfinite(absab) || issubnormal(absab) || issubnormal(a) || issubnormal(b) || iszero(a) - !(isfinite(a) && isfinite(b) && isfinite(c)) && return a*b+c - iszero(a) || iszero(b) && return a*b+c - a_norm = reinterpret(Float64, reinterpret(UInt64, a) & 0xc00fffffffffffff) - b_norm = reinterpret(Float64, reinterpret(UInt64, b) & 0xc00fffffffffffff) - abhi, ablo = twomul(a_norm,b_norm) + abhi, ablo = twomul(a,b) + if !isfinite(abhi+c) || issubnormal(ablo) || issubnormal(a) || issubnormal(b) + !(isfinite(abhi+c)) && return abhi+c + (iszero(a) || iszero(b)) && return abhi+c bias = exponent(a) + exponent(b) - c_denorm = ldexp(c, bias) - isfinite(c_denorm) && return ldexp(fma_emulated(a_norm, b_norm, c_denorm), -bias) - c_denorm = ldexp(c, bias-54) - isfinite(c_denorm) && return ldexp(fma_emulated(ldexp(a_norm, -54), b_norm, c_denorm), bias+54) - return a*b+c + c_denorm = ldexp(c, -bias) + if isfinite(c_denorm) + # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a)) + issubnormal(a) && (a *= 0x1p52) + issubnormal(b) && (b *= 0x1p52) + a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000) + b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000) + c = c_denorm + abhi, ablo = twomul(a,b) + r = abhi+c + s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) + sumhi = r+s + if issubnormal(ldexp(sumhi, bias)) #black magic don't worry about it. + sumlo = sumhi-r-s + bits_lost = -bias-exponent(sumhi)-1022 + sumhiInt = reinterpret(UInt64, sumhi) + if sumlo != 0 && ((bits_lost != 1) ⊻ (sumhiInt&1 == 1)) + sumhi = nextfloat(sumhi, ifelse(signbit(sumhi)==signbit(sumlo), -1, 1)) + end + end + return ldexp(sumhi, bias) + end + isinf(abhi) && signbit(c) == signbit(a*b) && return abhi + # fall through end - abhi, ablo = twomul(a,b) + r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) return r+s diff --git a/test/math.jl b/test/math.jl index c1f7b3ced0a77..3e4f8e7d53a2e 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1291,21 +1291,21 @@ end for fma in (fma, Base.fma_emulated) @test fma(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 @test fma(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 - for T in (Float32, Float64) + @testset "$T" for T in (Float32, Float64) @test fma(floatmax(T), T(2), -floatmax(T)) === floatmax(T) @test fma(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) @test fma(T(Inf), T(Inf), T(Inf)) === T(Inf) @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) @test isnan_type(T, fma(T(Inf), T(0), -T(0))) @test fma(-zero(T), zero(T), -zero(T)) === -zero(T) + for _ in 1:2^18 + a, b, c = reinterpret.(T, rand(uinttype(T), 3)) + @test isequal(Base.fma_emulated(a, b, c), fma(a, b, c)) || (a,b,c) + end end @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 @test fma(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf @test fma(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf - for _ in 1:2^18 - a, b, c = reinterpret.(Float64, rand(-4503599627370497:9218868437227405311, 3)) - @test Base.fma_emulated(a, b, c) === fma(a, b, c) || (a,b,c) - end end end From 5091397e81da8ecf14b359eb92b543de32a39f8f Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 27 Oct 2021 10:57:45 -0400 Subject: [PATCH 05/12] fixed float32 --- base/floatfuncs.jl | 12 +++++++----- test/math.jl | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 70d13d40e9678..a3e0ab9c83cd6 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -342,12 +342,14 @@ significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in algorithms. See [`muladd`](@ref). """ function fma end -function fma_emulated(a::Float32, b::Float32, c::Float32) +function fma_emulated(a::Float32, b::Float32, c::Float32)::Float32 ab = Float64(a) * b - result = ab+c - # If converting to Float32 doesn't cause rounding problems - (reinterpret(UInt64, result)&0x0000_0000_1fff_ffff != 0x1000_0000) && isequal(ab, result-c) && return Float32(result) - return result #TODO fixed. + res = ab+c + reinterpret(UInt64, res)&0x1fff_ffff!=0x1000_0000 && return res + # yes error compensation is necessary. It sucks + reslo = abs(c)>abs(ab) ? ab-(res - c) : c-(res - ab) + res = iszero(reslo) ? res : (signbit(reslo) ? prevfloat(res) : nextfloat(res)) + return res end """ Splits a Float64 into a hi bit and a low bit where the high bit has 27 trailing 0s and the low bit has 26 trailing 0s""" diff --git a/test/math.jl b/test/math.jl index 3e4f8e7d53a2e..e0c913e904aa6 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1307,5 +1307,6 @@ end @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 @test fma(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf @test fma(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf + @test fma(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6 end end From cd104897e8c6dc5ac91b5f0a3ebcb2fd9e06b2c7 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 27 Oct 2021 11:32:30 -0400 Subject: [PATCH 06/12] fix float64 --- base/floatfuncs.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index a3e0ab9c83cd6..eb7e6ee8b7edd 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -369,11 +369,13 @@ end function fma_emulated(a::Float64, b::Float64,c::Float64) abhi, ablo = twomul(a,b) - if !isfinite(abhi+c) || issubnormal(ablo) || issubnormal(a) || issubnormal(b) - !(isfinite(abhi+c)) && return abhi+c + #display(twomul(a,b)) + if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b) + isfinite(abhi+c) || return abhi+c (iszero(a) || iszero(b)) && return abhi+c bias = exponent(a) + exponent(b) c_denorm = ldexp(c, -bias) + #display((a,b,c_denorm)) if isfinite(c_denorm) # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a)) issubnormal(a) && (a *= 0x1p52) @@ -381,6 +383,7 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000) b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000) c = c_denorm + #display((a,b,c)) abhi, ablo = twomul(a,b) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) @@ -398,7 +401,6 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) isinf(abhi) && signbit(c) == signbit(a*b) && return abhi # fall through end - r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) return r+s From 28dc996a81a6c51ce8b3737ed1b27d5b31eed82c Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Thu, 28 Oct 2021 12:19:53 -0400 Subject: [PATCH 07/12] fix test --- test/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/math.jl b/test/math.jl index e0c913e904aa6..501e270e9445a 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1299,7 +1299,7 @@ end @test isnan_type(T, fma(T(Inf), T(0), -T(0))) @test fma(-zero(T), zero(T), -zero(T)) === -zero(T) for _ in 1:2^18 - a, b, c = reinterpret.(T, rand(uinttype(T), 3)) + a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3)) @test isequal(Base.fma_emulated(a, b, c), fma(a, b, c)) || (a,b,c) end end From 5a2f20f2c3779cd57f6e2e23693348ec8989f307 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Tue, 2 Nov 2021 17:43:43 -0400 Subject: [PATCH 08/12] just before I give up --- base/floatfuncs.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index eb7e6ee8b7edd..b6d011b7d81a6 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -369,13 +369,11 @@ end function fma_emulated(a::Float64, b::Float64,c::Float64) abhi, ablo = twomul(a,b) - #display(twomul(a,b)) if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b) - isfinite(abhi+c) || return abhi+c + (isfinite(a) && isfinite(b) && isfinite(c)) || return abhi+c (iszero(a) || iszero(b)) && return abhi+c bias = exponent(a) + exponent(b) c_denorm = ldexp(c, -bias) - #display((a,b,c_denorm)) if isfinite(c_denorm) # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a)) issubnormal(a) && (a *= 0x1p52) @@ -383,17 +381,18 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000) b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000) c = c_denorm - #display((a,b,c)) + display((a,b,c)) abhi, ablo = twomul(a,b) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) sumhi = r+s if issubnormal(ldexp(sumhi, bias)) #black magic don't worry about it. - sumlo = sumhi-r-s + sumlo = r-sumhi+s bits_lost = -bias-exponent(sumhi)-1022 sumhiInt = reinterpret(UInt64, sumhi) + display((sumhi, sumlo, bits_lost, sumhiInt, bias)) if sumlo != 0 && ((bits_lost != 1) ⊻ (sumhiInt&1 == 1)) - sumhi = nextfloat(sumhi, ifelse(signbit(sumhi)==signbit(sumlo), -1, 1)) + sumhi = nextfloat(sumhi, ifelse(signbit(sumhi)==signbit(sumlo), 1, -1)) end end return ldexp(sumhi, bias) From 367e920a78c25d5eb9430751d984aafcb2a1f213 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Tue, 2 Nov 2021 17:48:54 -0400 Subject: [PATCH 09/12] I gave up --- base/floatfuncs.jl | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index b6d011b7d81a6..0bba3dc2dd684 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -372,6 +372,9 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b) (isfinite(a) && isfinite(b) && isfinite(c)) || return abhi+c (iszero(a) || iszero(b)) && return abhi+c + if issubnormal(abhi+c) #todo optimize + return Float64(big(a)*b+c) + end bias = exponent(a) + exponent(b) c_denorm = ldexp(c, -bias) if isfinite(c_denorm) @@ -381,20 +384,10 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) a = reinterpret(Float64, (reinterpret(UInt64, a) & 0x800fffffffffffff) | 0x3ff0000000000000) b = reinterpret(Float64, (reinterpret(UInt64, b) & 0x800fffffffffffff) | 0x3ff0000000000000) c = c_denorm - display((a,b,c)) abhi, ablo = twomul(a,b) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) sumhi = r+s - if issubnormal(ldexp(sumhi, bias)) #black magic don't worry about it. - sumlo = r-sumhi+s - bits_lost = -bias-exponent(sumhi)-1022 - sumhiInt = reinterpret(UInt64, sumhi) - display((sumhi, sumlo, bits_lost, sumhiInt, bias)) - if sumlo != 0 && ((bits_lost != 1) ⊻ (sumhiInt&1 == 1)) - sumhi = nextfloat(sumhi, ifelse(signbit(sumhi)==signbit(sumlo), 1, -1)) - end - end return ldexp(sumhi, bias) end isinf(abhi) && signbit(c) == signbit(a*b) && return abhi From c7a3192a7d7674b695e7299b69f545c01093e8ed Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 3 Nov 2021 10:20:09 -0400 Subject: [PATCH 10/12] everything works no bigfloat hacks needed! --- base/floatfuncs.jl | 11 ++++++++--- test/math.jl | 30 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 0bba3dc2dd684..824285a4cdfe3 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -372,9 +372,6 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b) (isfinite(a) && isfinite(b) && isfinite(c)) || return abhi+c (iszero(a) || iszero(b)) && return abhi+c - if issubnormal(abhi+c) #todo optimize - return Float64(big(a)*b+c) - end bias = exponent(a) + exponent(b) c_denorm = ldexp(c, -bias) if isfinite(c_denorm) @@ -388,6 +385,14 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) sumhi = r+s + if issubnormal(ldexp(sumhi, bias)) #black magic don't worry about it. + sumlo = r-sumhi+s + bits_lost = -bias-exponent(sumhi)-1022 + sumhiInt = reinterpret(UInt64, sumhi) + if (bits_lost != 1) ⊻ (sumhiInt&1 == 1) + sumhi = nextfloat(sumhi, cmp(sumlo,0)) + end + end return ldexp(sumhi, bias) end isinf(abhi) && signbit(c) == signbit(a*b) && return abhi diff --git a/test/math.jl b/test/math.jl index 501e270e9445a..f935156a029f5 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1288,25 +1288,25 @@ end end @testset "fma" begin - for fma in (fma, Base.fma_emulated) - @test fma(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 - @test fma(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 + for func in (fma, Base.fma_emulated) + @test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16 + @test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7 @testset "$T" for T in (Float32, Float64) - @test fma(floatmax(T), T(2), -floatmax(T)) === floatmax(T) - @test fma(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) - @test fma(T(Inf), T(Inf), T(Inf)) === T(Inf) - @test isnan_type(T, fma(T(Inf), T(1), -T(Inf))) - @test isnan_type(T, fma(T(Inf), T(0), -T(0))) - @test fma(-zero(T), zero(T), -zero(T)) === -zero(T) + @test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T) + @test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf) + @test func(T(Inf), T(Inf), T(Inf)) === T(Inf) + @test isnan_type(T, func(T(Inf), T(1), -T(Inf))) + @test isnan_type(T, func(T(Inf), T(0), -T(0))) + @test func(-zero(T), zero(T), -zero(T)) === -zero(T) for _ in 1:2^18 a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3)) - @test isequal(Base.fma_emulated(a, b, c), fma(a, b, c)) || (a,b,c) + @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c) end end - @test fma(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 - @test fma(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 - @test fma(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf - @test fma(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf - @test fma(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6 + @test fmafuncfloatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 + @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 + @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf + @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf + @test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6 end end From b82ef4adbd9a5a7676d4787e3203581ab4a12135 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 3 Nov 2021 10:47:22 -0400 Subject: [PATCH 11/12] oops fixed tests --- test/math.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/math.jl b/test/math.jl index f935156a029f5..da027fa8919f4 100644 --- a/test/math.jl +++ b/test/math.jl @@ -1303,7 +1303,7 @@ end @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c) end end - @test fmafuncfloatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 + @test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292 @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31 @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf From 892c1639b3b57b1da1d14735d719fa257303f818 Mon Sep 17 00:00:00 2001 From: oscarddssmith Date: Wed, 3 Nov 2021 13:11:32 -0400 Subject: [PATCH 12/12] make comment more explanatory --- base/floatfuncs.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl index 824285a4cdfe3..4c2f03d304f81 100644 --- a/base/floatfuncs.jl +++ b/base/floatfuncs.jl @@ -385,7 +385,9 @@ function fma_emulated(a::Float64, b::Float64,c::Float64) r = abhi+c s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo) sumhi = r+s - if issubnormal(ldexp(sumhi, bias)) #black magic don't worry about it. + # If result is subnormal, ldexp will cause double rounding because subnormals have fewer mantisa bits. + # As such, we need to check whether round to even would lead to double rounding and manually round sumhi to avoid it. + if issubnormal(ldexp(sumhi, bias)) sumlo = r-sumhi+s bits_lost = -bias-exponent(sumhi)-1022 sumhiInt = reinterpret(UInt64, sumhi)