-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Closed
Labels
GCGarbage collectorGarbage collectorcompiler:simdinstruction-level vectorizationinstruction-level vectorizationperformanceMust go fasterMust go faster
Description
struct Wrapper{T} <: DenseVector{T}
data::Vector{T}
end
Base.length(w::Wrapper) = length(w.data)
Base.size(w::Wrapper) = size(w.data)
Base.unsafe_convert(::Type{Ptr{T}}, w::Wrapper{T}) where {T} = Base.unsafe_convert(Ptr{T}, w.data)
@inline function Base.getindex(w::Wrapper, i::Integer)
@boundscheck (0 < i ≤ length(w)) || throw(BoundsError(w, i))
GC.@preserve w begin
v = unsafe_load(pointer(w), i)
end
v
end
@inline function Base.setindex!(pw::Wrapper{T}, v, i::Integer) where {T}
@boundscheck (0 < i ≤ length(w)) || throw(BoundsError(w, i))
GC.@preserve w begin
unsafe_store!(pointer(w), v, i)
end
v
end
function mysum(x)
s = zero(eltype(x))
@inbounds @simd for i in eachindex(x)
s += x[i]
end
s
end
By setting JULIA_LLVM_ARGS
to --pass-remarks-analysis=loop-vectorize --pass-remarks-missed=loop-vectorize --pass-remarks=loop-vectorize
, I get:
remark: simdloop.jl:75:0: loop not vectorized: instruction cannot be vectorized
remark: simdloop.jl:75:0: loop not vectorized
Removing the GC.@preserve
yields:
remark: simdloop.jl:75:0: vectorized loop (vectorization width: 4, interleaved count: 4)
I believe the GC.@preserve
is necessary, so it'd be nice if it didn't carry a potential performance penalty.
Metadata
Metadata
Assignees
Labels
GCGarbage collectorGarbage collectorcompiler:simdinstruction-level vectorizationinstruction-level vectorizationperformanceMust go fasterMust go faster