diff --git a/src/differentiation/compute_jacobian_ad.jl b/src/differentiation/compute_jacobian_ad.jl index e9d716e9..9e715892 100644 --- a/src/differentiation/compute_jacobian_ad.jl +++ b/src/differentiation/compute_jacobian_ad.jl @@ -294,6 +294,8 @@ function forwarddiff_color_jacobian!(J::AbstractMatrix{<:Number}, sparsity = jac_cache.sparsity chunksize = jac_cache.chunksize color_i = 1 + adaptedcolorvec = adapt(__parameterless_type(typeof(dx)),colorvec) + maxcolor = maximum(colorvec) if J isa AbstractSparseMatrix @@ -357,9 +359,17 @@ function forwarddiff_color_jacobian!(J::AbstractMatrix{<:Number}, += means requires a zero'd out start =# if J isa AbstractSparseMatrix - @. setindex!((J.nzval,),getindex((J.nzval,),rows_index) + (getindex((colorvec,),cols_index) == color_i) * getindex((vecdx,),rows_index),rows_index) + if J isa SparseMatrixCSC + @. void_setindex!(Ref(nonzeros(J)),getindex(Ref(nonzeros(J)),rows_index) + (getindex(Ref(adaptedcolorvec),cols_index) == color_i) * getindex(Ref(vecdx),rows_index),rows_index) + else + nzval = @view nonzeros(J)[rows_index] + cv = @view adaptedcolorvec[cols_index] + vdx = @view dx[rows_index] + tmp = cv .== color_i + nzval .+= tmp .* vdx + end else - @. setindex!((J,),getindex((J,),rows_index, cols_index) + (getindex((colorvec,),cols_index) == color_i) * getindex((vecdx,),rows_index),rows_index, cols_index) + @. void_setindex!(Ref(J),getindex(Ref(J),rows_index, cols_index) + (getindex(Ref(colorvec),cols_index) == color_i) * getindex(Ref(vecdx),rows_index),rows_index, cols_index) end end color_i += 1 diff --git a/test/test_gpu_ad.jl b/test/test_gpu_ad.jl index 3b041a38..f10d4eaa 100644 --- a/test/test_gpu_ad.jl +++ b/test/test_gpu_ad.jl @@ -1,18 +1,25 @@ using SparseDiffTools, CUDA, Test, LinearAlgebra using ArrayInterface: allowed_getindex, allowed_setindex! +using SparseArrays + function f(dx,x) dx[2:end-1] = x[1:end-2] - 2x[2:end-1] + x[3:end] allowed_setindex!(dx,-2allowed_getindex(x,1) + allowed_getindex(x,2),1) allowed_setindex!(dx,-2allowed_getindex(x,30) + allowed_getindex(x,29),30) nothing end - +x = rand(4) _J1 = similar(rand(30,30)) _denseJ1 = cu(collect(_J1)) x = cu(rand(30)) CUDA.allowscalar(false) -forwarddiff_color_jacobian!(_denseJ1, f, x) -@test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, sparsity = _J1) isa Nothing -@test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, colorvec = repeat(1:3,10), sparsity = _J1) isa Nothing +_J2 = sparse(forwarddiff_color_jacobian!(_denseJ1, f, x)) +out = copy(_J2) +forwarddiff_color_jacobian!(out, f, x, colorvec = repeat(1:3,10), sparsity = _J2) + +@test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, sparsity = cu(_J1)) isa Nothing +@test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, colorvec = repeat(1:3,10), sparsity = cu(_J1)) isa Nothing _Jt = similar(Tridiagonal(_J1)) @test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, colorvec = repeat(1:3,10), sparsity = _Jt) isa Nothing +_Jt2 = similar(Tridiagonal(cu(_J1))) +@test_broken forwarddiff_color_jacobian!(_denseJ1, f, x, colorvec = repeat(1:3,10), sparsity = _Jt2) isa Nothing