Skip to content

Commit 817f18f

Browse files
committed
Optimize multiplication for Normed
This adds `wrapping_mul`, `saturating_mul` and `checked_mul` binary operations. However, this does not specialize them for `Fixed` and does not change `*` for `Fixed`. This replaces most of Normed's implementation of multiplication with integer operations. This improves the speed in many cases and the accuracy in some cases.
1 parent 5dcaf97 commit 817f18f

File tree

4 files changed

+101
-6
lines changed

4 files changed

+101
-6
lines changed

src/FixedPointNumbers.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ float(x::FixedPoint) = convert(floattype(x), x)
189189
wrapping_neg(x::X) where {X <: FixedPoint} = X(-x.i, 0)
190190
wrapping_add(x::X, y::X) where {X <: FixedPoint} = X(x.i + y.i, 0)
191191
wrapping_sub(x::X, y::X) where {X <: FixedPoint} = X(x.i - y.i, 0)
192+
wrapping_mul(x::X, y::X) where {X <: FixedPoint} = (float(x) * float(y)) % X
192193

193194
# saturating arithmetic
194195
saturating_neg(x::X) where {X <: FixedPoint} = X(~min(x.i - true, x.i), 0)
@@ -202,6 +203,8 @@ saturating_sub(x::X, y::X) where {X <: FixedPoint} =
202203
X(x.i - ifelse(x.i < 0, min(y.i, x.i - typemin(x.i)), max(y.i, x.i - typemax(x.i))), 0)
203204
saturating_sub(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i - min(x.i, y.i), 0)
204205

206+
saturating_mul(x::X, y::X) where {X <: FixedPoint} = clamp(float(x) * float(y), X)
207+
205208
# checked arithmetic
206209
checked_neg(x::X) where {X <: FixedPoint} = checked_sub(zero(X), x)
207210
function checked_add(x::X, y::X) where {X <: FixedPoint}
@@ -216,6 +219,7 @@ function checked_sub(x::X, y::X) where {X <: FixedPoint}
216219
f && throw_overflowerror(:-, x, y)
217220
z
218221
end
222+
checked_mul(x::X, y::X) where {X <: FixedPoint} = X(float(x) * float(y))
219223

220224
# default arithmetic
221225
const DEFAULT_ARITHMETIC = :wrapping
@@ -226,7 +230,7 @@ for (op, name) in ((:-, :neg), )
226230
$op(x::X) where {X <: FixedPoint} = $f(x)
227231
end
228232
end
229-
for (op, name) in ((:+, :add), (:-, :sub))
233+
for (op, name) in ((:+, :add), (:-, :sub), (:*, :mul))
230234
f = Symbol(DEFAULT_ARITHMETIC, :_, name)
231235
@eval begin
232236
$op(x::X, y::X) where {X <: FixedPoint} = $f(x, y)

src/normed.jl

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ function rem(x::Float64, ::Type{N}) where {f, N <: Normed{UInt64,f}}
127127
reinterpret(N, r << UInt8(f - 53) - unsigned(signed(r) >> 0x35))
128128
end
129129

130-
131130
function (::Type{T})(x::Normed) where {T <: AbstractFloat}
132131
# The following optimization for constant division may cause rounding errors.
133132
# y = reinterpret(x)*(one(rawtype(x))/convert(T, rawone(x)))
@@ -248,8 +247,45 @@ Base.BigFloat(x::Normed) = reinterpret(x) / BigFloat(rawone(x))
248247

249248
Base.Rational(x::Normed) = reinterpret(x)//rawone(x)
250249

251-
# unchecked arithmetic
252-
*(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)*convert(floattype(T), y))
250+
# Division by `2^f-1` with RoundNearest. The result would be in the lower half bits.
251+
div_2fm1(x::T, ::Val{f}) where {T, f} = (x + (T(1)<<(f - 1) - 0x1)) ÷ (T(1) << f - 0x1)
252+
div_2fm1(x::T, ::Val{1}) where T = x
253+
div_2fm1(x::UInt16, ::Val{8}) = (((x + 0x80) >> 0x8) + x + 0x80) >> 0x8
254+
div_2fm1(x::UInt32, ::Val{16}) = (((x + 0x8000) >> 0x10) + x + 0x8000) >> 0x10
255+
div_2fm1(x::UInt64, ::Val{32}) = (((x + 0x80000000) >> 0x20) + x + 0x80000000) >> 0x20
256+
div_2fm1(x::UInt64, ::Val{64}) = (((x + 0x8000000000000000) >> 0x40) + x + 0x8000000000000000) >> 0x40
257+
258+
# wrapping arithmetic
259+
function wrapping_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}}
260+
z = widemul(x.i, y.i)
261+
N(div_2fm1(z, Val(Int(f))) % T, 0)
262+
end
263+
264+
# saturating arithmetic
265+
function saturating_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}}
266+
f == bitwidth(T) && return wrapping_mul(x, y)
267+
z = min(widemul(x.i, y.i), widemul(typemax(N).i, rawone(N)))
268+
N(div_2fm1(z, Val(Int(f))) % T, 0)
269+
end
270+
271+
# checked arithmetic
272+
function checked_mul(x::N, y::N) where {N <: Normed}
273+
z = float(x) * float(y)
274+
z < typemax(N) + eps(N)/2 || throw_overflowerror(:*, x, y)
275+
z % N
276+
end
277+
function checked_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}}
278+
f == bitwidth(T) && return wrapping_mul(x, y)
279+
z = widemul(x.i, y.i)
280+
m = widemul(typemax(N).i, rawone(N)) + (rawone(N) >> 0x1)
281+
z < m || throw_overflowerror(:*, x, y)
282+
N(div_2fm1(z, Val(Int(f))) % T, 0)
283+
end
284+
285+
# TODO: decide the default arithmetic for `Normed` mul
286+
# Override the default arithmetic with `checked` for backward compatibility
287+
*(x::N, y::N) where {N <: Normed} = checked_mul(x, y)
288+
253289
/(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)/convert(floattype(T), y))
254290

255291
# Functions

test/fixed.jl

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,12 +343,38 @@ end
343343
xys = ((x, y) for x in xs, y in xs)
344344
fsub(x, y) = float(x) - float(y)
345345
@test all(((x, y),) -> wrapping_add(wrapping_sub(x, y), y) === x, xys)
346-
@test all(((x, y),) -> saturating_sub(x, y) == clamp(fsub(x, y), F), xys)
347-
@test all(((x, y),) -> !(typemin(F) < fsub(x, y) < typemax(F)) ||
346+
@test all(((x, y),) -> saturating_sub(x, y) === clamp(fsub(x, y), F), xys)
347+
@test all(((x, y),) -> !(typemin(F) <= fsub(x, y) <= typemax(F)) ||
348348
wrapping_sub(x, y) === checked_sub(x, y) === fsub(x, y) % F, xys)
349349
end
350350
end
351351

352+
@testset "mul" begin
353+
for F in target(Fixed; ex = :thin)
354+
@test wrapping_mul(typemax(F), zero(F)) === zero(F)
355+
@test saturating_mul(typemax(F), zero(F)) === zero(F)
356+
@test checked_mul(typemax(F), zero(F)) === zero(F)
357+
358+
@test wrapping_mul(F(-1), typemax(F)) === -typemax(F)
359+
@test saturating_mul(F(-1), typemax(F)) === -typemax(F)
360+
@test checked_mul(F(-1), typemax(F)) === -typemax(F)
361+
362+
# FIXME: Both the rhs and lhs of the following test may be inaccurate.
363+
@test_skip wrapping_mul(typemin(F), typemax(F)) === big(typemin(F)) * big(typemax(F)) % F
364+
@test saturating_mul(typemin(F), typemax(F)) === typemin(F)
365+
@test_throws Exception checked_mul(typemin(F), typemax(F)) # TODO: Exception -> OverflowError
366+
end
367+
for F in target(Fixed, :i8; ex = :thin)
368+
xs = typemin(F):eps(F):typemax(F)
369+
xys = ((x, y) for x in xs, y in xs)
370+
fmul(x, y) = float(x) * float(y) # note that precision(Float32) < 32
371+
@test all(((x, y),) -> wrapping_mul(x, y) === fmul(x, y) % F, xys)
372+
@test all(((x, y),) -> saturating_mul(x, y) === clamp(fmul(x, y), F), xys)
373+
@test all(((x, y),) -> !(typemin(F) <= fmul(x, y) <= typemax(F)) ||
374+
wrapping_mul(x, y) === checked_mul(x, y), xys)
375+
end
376+
end
377+
352378
@testset "rounding" begin
353379
for sym in (:i8, :i16, :i32, :i64)
354380
T = symbol_to_inttype(Fixed, sym)

test/normed.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,35 @@ end
374374
end
375375
end
376376

377+
@testset "mul" begin
378+
for N in target(Normed; ex = :thin)
379+
@test wrapping_mul(typemax(N), zero(N)) === zero(N)
380+
@test saturating_mul(typemax(N), zero(N)) === zero(N)
381+
@test checked_mul(typemax(N), zero(N)) === zero(N)
382+
383+
@test wrapping_mul(one(N), typemax(N)) === typemax(N)
384+
@test saturating_mul(one(N), typemax(N)) === typemax(N)
385+
@test checked_mul(one(N), typemax(N)) === typemax(N)
386+
387+
@test wrapping_mul(typemax(N), typemax(N)) === big(typemax(N))^2 % N
388+
@test saturating_mul(typemax(N), typemax(N)) === typemax(N)
389+
if typemax(N) == 1
390+
@test checked_mul(typemax(N), typemax(N)) === typemax(N)
391+
else
392+
@test_throws OverflowError checked_mul(typemax(N), typemax(N))
393+
end
394+
end
395+
for N in target(Normed, :i8; ex = :thin)
396+
xs = typemin(N):eps(N):typemax(N)
397+
xys = ((x, y) for x in xs, y in xs)
398+
fmul(x, y) = float(x) * float(y) # note that precision(Float32) < 32
399+
@test all(((x, y),) -> wrapping_mul(x, y) === fmul(x, y) % N, xys)
400+
@test all(((x, y),) -> saturating_mul(x, y) === clamp(fmul(x, y), N), xys)
401+
@test all(((x, y),) -> !(typemin(N) <= fmul(x, y) <= typemax(N)) ||
402+
wrapping_mul(x, y) === checked_mul(x, y), xys)
403+
end
404+
end
405+
377406
@testset "div/fld1" begin
378407
@test div(reinterpret(N0f8, 0x10), reinterpret(N0f8, 0x02)) == fld(reinterpret(N0f8, 0x10), reinterpret(N0f8, 0x02)) == 8
379408
@test div(reinterpret(N0f8, 0x0f), reinterpret(N0f8, 0x02)) == fld(reinterpret(N0f8, 0x0f), reinterpret(N0f8, 0x02)) == 7

0 commit comments

Comments
 (0)