From de77c9786871b5906275590bca477d6ca3c0003c Mon Sep 17 00:00:00 2001 From: Collin Wittenstein Date: Fri, 18 Jul 2025 23:36:47 +0200 Subject: [PATCH 1/5] small changes which make it ~1.5x faster --- src/array_partition.jl | 40 ++++++++++++++++++++++++++++-------- src/named_array_partition.jl | 1 + 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/array_partition.jl b/src/array_partition.jl index b0325fe0..978f3427 100644 --- a/src/array_partition.jl +++ b/src/array_partition.jl @@ -364,15 +364,37 @@ end ArrayPartition(f, N) end +# old version +# @inline function Base.copyto!(dest::ArrayPartition, +# bc::Broadcast.Broadcasted{ArrayPartitionStyle{Style}}) where { +# Style, +# } +# N = npartitions(dest, bc) +# @inline function f(i) +# copyto!(dest.x[i], unpack(bc, i)) +# end +# ntuple(f, Val(N)) +# dest +# end + +# new version @inline function Base.copyto!(dest::ArrayPartition, - bc::Broadcast.Broadcasted{ArrayPartitionStyle{Style}}) where { - Style, -} + bc::Broadcast.Broadcasted{ArrayPartitionStyle{Style}}) where {Style} N = npartitions(dest, bc) - @inline function f(i) - copyto!(dest.x[i], unpack(bc, i)) + # Check if this is a simple enough broadcast that we can optimize + if bc.f isa Union{typeof(+), typeof(*), typeof(muladd)} + # @show "hey", bc, N + @inbounds for i in 1:N + # Use materialize! which is more efficient than copyto! for simple broadcasts + Base.Broadcast.materialize!(dest.x[i], unpack(bc, i)) + end + else + # Fall back to original implementation for complex broadcasts + @inline function f(i) + copyto!(dest.x[i], unpack(bc, i)) + end + ntuple(f, Val(N)) end - ntuple(f, Val(N)) dest end @@ -411,8 +433,10 @@ end i) where {Style <: Broadcast.DefaultArrayStyle} Broadcast.Broadcasted{Style}(bc.f, unpack_args(i, bc.args)) end -unpack(x, ::Any) = x -unpack(x::ArrayPartition, i) = x.x[i] + +@inline unpack(x, ::Any) = x +@inline unpack(x::ArrayPartition, i) = x.x[i] + @inline function unpack_args(i, args::Tuple) (unpack(args[1], i), unpack_args(i, Base.tail(args))...) diff --git a/src/named_array_partition.jl b/src/named_array_partition.jl index de8fa91a..75460016 100644 --- a/src/named_array_partition.jl +++ b/src/named_array_partition.jl @@ -135,6 +135,7 @@ end NamedArrayPartition(f, N, getfield(x, :names_to_indices)) end +# TODO: has this also performance problems and can be improved? @inline function Base.copyto!(dest::NamedArrayPartition, bc::Broadcast.Broadcasted{Broadcast.ArrayStyle{NamedArrayPartition}}) N = npartitions(dest, bc) From f0f51c3c0e81733a1a17b4cb962fa999bafbbd36 Mon Sep 17 00:00:00 2001 From: Collin Wittenstein Date: Sat, 19 Jul 2025 17:33:08 +0200 Subject: [PATCH 2/5] new example to readme --- README.md | 21 +++++++++++++++++++++ src/array_partition.jl | 1 - 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index af34685c..cc53aa52 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ the documentation, which contains the unreleased features. ## Example +### VectorOfArray + ```julia using RecursiveArrayTools a = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] @@ -30,11 +32,30 @@ vA = VectorOfArray(a) vB = VectorOfArray(b) vA .* vB # Now all standard array stuff works! +``` +### ArrayPartition + +```julia a = (rand(5), rand(5)) b = (rand(5), rand(5)) pA = ArrayPartition(a) pB = ArrayPartition(b) pA .* pB # Now all standard array stuff works! + + +x0 = rand(3,3) +v0 = rand(3,3) +a0 = rand(3,3) +u0 = ArrayPartition(x0, v0, a0) +u0.x[1] == x0 # true + +u0 .+= 1 +u0.x[2] == v0 # still true + +# do some calculations creating a new partitioned array +unew = u0 * 10 +# easily access the individual components without having to rely on complicated indexing +xnew, vnew, anew = unew.x ``` diff --git a/src/array_partition.jl b/src/array_partition.jl index 978f3427..210b69fc 100644 --- a/src/array_partition.jl +++ b/src/array_partition.jl @@ -383,7 +383,6 @@ end N = npartitions(dest, bc) # Check if this is a simple enough broadcast that we can optimize if bc.f isa Union{typeof(+), typeof(*), typeof(muladd)} - # @show "hey", bc, N @inbounds for i in 1:N # Use materialize! which is more efficient than copyto! for simple broadcasts Base.Broadcast.materialize!(dest.x[i], unpack(bc, i)) From b08a940ec6e3b7c5c444f746a6b3e10979dffb5b Mon Sep 17 00:00:00 2001 From: Collin Wittenstein Date: Mon, 21 Jul 2025 13:40:51 +0200 Subject: [PATCH 3/5] new copyto! function --- src/array_partition.jl | 21 +++------------------ src/named_array_partition.jl | 6 ++---- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/src/array_partition.jl b/src/array_partition.jl index 210b69fc..d693c6b4 100644 --- a/src/array_partition.jl +++ b/src/array_partition.jl @@ -364,28 +364,13 @@ end ArrayPartition(f, N) end -# old version -# @inline function Base.copyto!(dest::ArrayPartition, -# bc::Broadcast.Broadcasted{ArrayPartitionStyle{Style}}) where { -# Style, -# } -# N = npartitions(dest, bc) -# @inline function f(i) -# copyto!(dest.x[i], unpack(bc, i)) -# end -# ntuple(f, Val(N)) -# dest -# end - -# new version @inline function Base.copyto!(dest::ArrayPartition, bc::Broadcast.Broadcasted{ArrayPartitionStyle{Style}}) where {Style} N = npartitions(dest, bc) - # Check if this is a simple enough broadcast that we can optimize - if bc.f isa Union{typeof(+), typeof(*), typeof(muladd)} + # If dest is all the same underlying array type, use for-loop + if all(x isa typeof(first(dest.x)) for x in dest.x) @inbounds for i in 1:N - # Use materialize! which is more efficient than copyto! for simple broadcasts - Base.Broadcast.materialize!(dest.x[i], unpack(bc, i)) + copyto!(dest.x[i], unpack(bc, i)) end else # Fall back to original implementation for complex broadcasts diff --git a/src/named_array_partition.jl b/src/named_array_partition.jl index 75460016..f77fa049 100644 --- a/src/named_array_partition.jl +++ b/src/named_array_partition.jl @@ -135,14 +135,12 @@ end NamedArrayPartition(f, N, getfield(x, :names_to_indices)) end -# TODO: has this also performance problems and can be improved? @inline function Base.copyto!(dest::NamedArrayPartition, bc::Broadcast.Broadcasted{Broadcast.ArrayStyle{NamedArrayPartition}}) N = npartitions(dest, bc) - @inline function f(i) - copyto!(ArrayPartition(dest).x[i], unpack(bc, i)) + @inbounds for i in 1:N + copyto!(dest.x[i], unpack(bc, i)) end - ntuple(f, Val(N)) return dest end From 02ac939b95069dcf6b4208ee325f5e1a1df9944c Mon Sep 17 00:00:00 2001 From: Collin Wittenstein Date: Mon, 21 Jul 2025 13:42:15 +0200 Subject: [PATCH 4/5] deleted spaces --- src/array_partition.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/array_partition.jl b/src/array_partition.jl index d693c6b4..89261f4b 100644 --- a/src/array_partition.jl +++ b/src/array_partition.jl @@ -417,11 +417,9 @@ end i) where {Style <: Broadcast.DefaultArrayStyle} Broadcast.Broadcasted{Style}(bc.f, unpack_args(i, bc.args)) end - @inline unpack(x, ::Any) = x @inline unpack(x::ArrayPartition, i) = x.x[i] - @inline function unpack_args(i, args::Tuple) (unpack(args[1], i), unpack_args(i, Base.tail(args))...) end From 71942b991e598ccb69fe3e72a420c98e0d85329a Mon Sep 17 00:00:00 2001 From: Collin Wittenstein Date: Mon, 21 Jul 2025 13:47:01 +0200 Subject: [PATCH 5/5] changed readme back to original --- README.md | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/README.md b/README.md index cc53aa52..af34685c 100644 --- a/README.md +++ b/README.md @@ -22,8 +22,6 @@ the documentation, which contains the unreleased features. ## Example -### VectorOfArray - ```julia using RecursiveArrayTools a = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] @@ -32,30 +30,11 @@ vA = VectorOfArray(a) vB = VectorOfArray(b) vA .* vB # Now all standard array stuff works! -``` -### ArrayPartition - -```julia a = (rand(5), rand(5)) b = (rand(5), rand(5)) pA = ArrayPartition(a) pB = ArrayPartition(b) pA .* pB # Now all standard array stuff works! - - -x0 = rand(3,3) -v0 = rand(3,3) -a0 = rand(3,3) -u0 = ArrayPartition(x0, v0, a0) -u0.x[1] == x0 # true - -u0 .+= 1 -u0.x[2] == v0 # still true - -# do some calculations creating a new partitioned array -unew = u0 * 10 -# easily access the individual components without having to rely on complicated indexing -xnew, vnew, anew = unew.x ```