Closed
Description
This is a thread for investigating changes to the LU defaults, based off of benchmarks like #356 .
(Note: there's a Mac-specific version 3 posts down)
using BenchmarkTools, Random, VectorizationBase
using LinearAlgebra, LinearSolve, MKL_jll
nc = min(Int(VectorizationBase.num_cores()), Threads.nthreads())
BLAS.set_num_threads(nc)
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.5
function luflop(m, n = m; innerflop = 2)
sum(1:min(m, n)) do k
invflop = 1
scaleflop = isempty((k + 1):m) ? 0 : sum((k + 1):m)
updateflop = isempty((k + 1):n) ? 0 :
sum((k + 1):n) do j
isempty((k + 1):m) ? 0 : sum((k + 1):m) do i
innerflop
end
end
invflop + scaleflop + updateflop
end
end
algs = [LUFactorization(), GenericLUFactorization(), RFLUFactorization(), MKLLUFactorization(), FastLUFactorization(), SimpleLUFactorization()]
res = [Float64[] for i in 1:length(algs)]
ns = 4:8:500
for i in 1:length(ns)
n = ns[i]
@info "$n × $n"
rng = MersenneTwister(123)
global A = rand(rng, n, n)
global b = rand(rng, n)
global u0= rand(rng, n)
for j in 1:length(algs)
bt = @belapsed solve(prob, $(algs[j])).u setup=(prob = LinearProblem(copy(A), copy(b); u0 = copy(u0), alias_A=true, alias_b=true))
push!(res[j], luflop(n) / bt / 1e9)
end
end
using Plots
__parameterless_type(T) = Base.typename(T).wrapper
parameterless_type(x) = __parameterless_type(typeof(x))
parameterless_type(::Type{T}) where {T} = __parameterless_type(T)
p = plot(ns, res[1]; ylabel = "GFLOPs", xlabel = "N", title = "GFLOPs for NxN LU Factorization", label = string(Symbol(parameterless_type(algs[1]))), legend=:outertopright)
for i in 2:length(res)
plot!(p, ns, res[i]; label = string(Symbol(parameterless_type(algs[i]))))
end
p
savefig("lubench.png")
savefig("lubench.pdf")
The justification for RecursiveFactorization.jl still looks very strong from the looks of this.
julia> versioninfo()
Julia Version 1.9.1
Commit 147bdf428c (2023-06-07 08:27 UTC)
Platform Info:
OS: Windows (x86_64-w64-mingw32)
CPU: 32 × AMD Ryzen 9 5950X 16-Core Processor
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-14.0.6 (ORCJIT, znver3)
Threads: 32 on 32 virtual cores
Environment:
JULIA_IMAGE_THREADS = 1
JULIA_EDITOR = code
JULIA_NUM_THREADS = 32
Needs examples on other systems.
Metadata
Metadata
Assignees
Labels
No labels