allow for immutable DiffResults to support totally stack-allocated computations (#9)

jrevels · web-flow · commit 1f15e9c3347b · 2017-05-18T11:23:43.000-04:00
diff --git a/REQUIRE b/REQUIRE
@@ -1 +1,2 @@
 julia 0.6-
+StaticArrays 0.5.0
diff --git a/src/DiffBase.jl b/src/DiffBase.jl
@@ -2,6 +2,8 @@ __precompile__()
 
 module DiffBase
 
+using StaticArrays
+
 include("results.jl")
 include("testfuncs.jl")
 include("rules.jl")
diff --git a/src/results.jl b/src/results.jl
@@ -1,57 +1,78 @@
-##############
-# DiffResult #
-##############
+#########
+# Types #
+#########
 
-mutable struct DiffResult{O,V,D<:Tuple}
+abstract type DiffResult{O,V,D<:Tuple} end
+
+struct ImmutableDiffResult{O,V,D<:Tuple} <: DiffResult{O,V,D}
     value::V
     derivs::D # ith element = ith-order derivative
-    function DiffResult{O,V,D}(value::V, derivs::NTuple{O,Any}) where {O,V,D}
-        return new{O,V,D}(value, derivs)
+    function ImmutableDiffResult(value::V, derivs::NTuple{O,Any}) where {O,V}
+        return new{O,V,typeof(derivs)}(value, derivs)
     end
 end
 
-"""
-    DiffResult(value, derivs::Tuple)
+mutable struct MutableDiffResult{O,V,D<:Tuple} <: DiffResult{O,V,D}
+    value::V
+    derivs::D # ith element = ith-order derivative
+    function MutableDiffResult(value::V, derivs::NTuple{O,Any}) where {O,V}
+        return new{O,V,typeof(derivs)}(value, derivs)
+    end
+end
 
-Return a `DiffResult` instance where values will be stored in the provided `value` storage
-and derivatives will be stored in the provided `derivs` storage.
+################
+# Constructors #
+################
 
-Note that the arguments can be `Number`s or `AbstractArray`s, depending on the dimensionality
-of your target function.
 """
-DiffResult{V,O}(value::V, derivs::NTuple{O,Any}) = DiffResult{O,V,typeof(derivs)}(value, derivs)
+    DiffResult(value::Union{Number,AbstractArray}, derivs::Tuple{Vararg{Number}})
+    DiffResult(value::Union{Number,AbstractArray}, derivs::Tuple{Vararg{AbstractArray}})
 
-"""
-    DiffResult(value, derivs...)
+Return `r::DiffResult`, with output value storage provided by `value` and output derivative
+storage provided by `derivs`.
+
+In reality, `DiffResult` is an abstract supertype of two concrete types, `MutableDiffResult`
+and `ImmutableDiffResult`. If all `value`/`derivs` are all `Number`s or `SArray`s, then `r`
+will be immutable (i.e. `r::ImmutableDiffResult`). Otherwise, `r` will be mutable
+(i.e. `r::MutableDiffResult`).
 
-Equivalent to `DiffResult(value, derivs::Tuple)`, where `derivs...` is the splatted form of `derivs::Tuple`.
+Note that `derivs` can be provide in splatted form, i.e. `DiffResult(value, derivs...)`.
 """
-DiffResult(value, derivs...) = DiffResult(value, derivs)
+DiffResult
+
+DiffResult(value::Number, derivs::Tuple{Vararg{Number}}) = ImmutableDiffResult(value, derivs)
+DiffResult(value::Number, derivs::Tuple{Vararg{SArray}}) = ImmutableDiffResult(value, derivs)
+DiffResult(value::SArray, derivs::Tuple{Vararg{SArray}}) = ImmutableDiffResult(value, derivs)
+DiffResult(value::Number, derivs::Tuple{Vararg{AbstractArray}}) = MutableDiffResult(value, derivs)
+DiffResult(value::AbstractArray, derivs::Tuple{Vararg{AbstractArray}}) = MutableDiffResult(value, derivs)
+DiffResult(value::Union{Number,AbstractArray}, derivs::Union{Number,AbstractArray}...) = DiffResult(value, derivs)
 
 """
     GradientResult(x::AbstractArray)
 
-Construct a `DiffResult` that can be used for gradient calculations where `x` is the
-input to the target function.
+Construct a `DiffResult` that can be used for gradient calculations where `x` is the input
+to the target function.
 
 Note that `GradientResult` allocates its own storage; `x` is only used for type and
 shape information. If you want to allocate storage yourself, use the `DiffResult`
 constructor instead.
 """
 GradientResult(x::AbstractArray) = DiffResult(first(x), similar(x))
+GradientResult(x::SArray) = DiffResult(first(x), x)
 
 """
     JacobianResult(x::AbstractArray)
 
-Construct a `DiffResult` that can be used for Jacobian calculations where `x` is the
-input to the target function. This method assumes that the target function's output
-dimension equals its input dimension.
+Construct a `DiffResult` that can be used for Jacobian calculations where `x` is the input
+to the target function. This method assumes that the target function's output dimension
+equals its input dimension.
 
 Note that `JacobianResult` allocates its own storage; `x` is only used for type and
 shape information. If you want to allocate storage yourself, use the `DiffResult`
 constructor instead.
 """
 JacobianResult(x::AbstractArray) = DiffResult(similar(x), similar(x, length(x), length(x)))
+JacobianResult(x::SArray{<:Any,T,<:Any,L}) where {T,L} = DiffResult(x, zeros(SMatrix{L,L,T}))
 
 """
     JacobianResult(y::AbstractArray, x::AbstractArray)
@@ -64,6 +85,7 @@ Like the single argument version, `y` and `x` are only used for type and
 shape information and are not stored in the returned `DiffResult`.
 """
 JacobianResult(y::AbstractArray, x::AbstractArray) = DiffResult(similar(y), similar(y, length(y), length(x)))
+JacobianResult(y::SArray{<:Any,<:Any,<:Any,Y}, x::SArray{<:Any,T,<:Any,X}) where {T,Y,X} = DiffResult(y, zeros(SMatrix{Y,X,T}))
 
 """
     HessianResult(x::AbstractArray)
@@ -76,9 +98,30 @@ shape information. If you want to allocate storage yourself, use the `DiffResult
 constructor instead.
 """
 HessianResult(x::AbstractArray) = DiffResult(first(x), similar(x), similar(x, length(x), length(x)))
+HessianResult(x::SArray{<:Any,T,<:Any,L}) where {T,L} = DiffResult(first(x), x, zeros(SMatrix{L,L,T}))
+
+#############
+# Interface #
+#############
+
+@generated function tuple_eltype(x::Tuple, ::Type{Val{i}}) where {i}
+    return quote
+        $(Expr(:meta, :inline))
+        return $(x.parameters[i])
+    end
+end
+
+@generated function tuple_setindex(x::NTuple{N,Any}, y, ::Type{Val{i}}) where {N,i}
+    new_tuple = Expr(:tuple, [ifelse(i == n, :y, :(x[$n])) for n in 1:N]...)
+    return quote
+        $(Expr(:meta, :inline))
+        return $new_tuple
+    end
+end
 
 Base.eltype(r::DiffResult) = eltype(typeof(r))
-Base.eltype{O,V,D}(::Type{DiffResult{O,V,D}}) = eltype(V)
+
+Base.eltype(::Type{D}) where {O,V,D<:DiffResult{O,V}} = eltype(V)
 
 Base.:(==)(a::DiffResult, b::DiffResult) = a.value == b.value && a.derivs == b.derivs
 
@@ -92,26 +135,35 @@ Base.copy(r::DiffResult) = DiffResult(copy(r.value), map(copy, r.derivs))
 
 Return the primal value stored in `r`.
 
-Note that this method returns a reference, not a copy. Thus, if `value(r)` is mutable,
-mutating `value(r)` will mutate `r`.
+Note that this method returns a reference, not a copy.
 """
 value(r::DiffResult) = r.value
 
 """
     value!(r::DiffResult, x)
 
-Copy `x` into `r`'s value storage, such that `value(r) == x`.
+Return `s::DiffResult` with the same data as `r`, except for `value(s) == x`.
+
+This function may or may not mutate `r`. If `r::ImmutableDiffResult`, a totally new
+instance will be created and returned, whereas if `r::MutableDiffResult`, then `r` will be
+mutated in-place and returned. Thus, this function should be called as `r = value!(r, x)`.
 """
-value!(r::DiffResult, x::Number) = (r.value = x; return r)
-value!(r::DiffResult, x::AbstractArray) = (copy!(value(r), x); return r)
+value!(r::MutableDiffResult, x::Number) = (r.value = x; return r)
+value!(r::MutableDiffResult, x::AbstractArray) = (copy!(value(r), x); return r)
+value!(r::ImmutableDiffResult, x::Union{Number,SArray}) = ImmutableDiffResult(x, r.derivs)
+value!(r::ImmutableDiffResult, x::AbstractArray) = ImmutableDiffResult(typeof(value(r))(x), r.derivs)
 
 """
     value!(f, r::DiffResult, x)
 
-Like `value!(r::DiffResult, x)`, but with `f` applied to each element, such that `value(r) == map(f, x)`.
+Equivalent to `value!(r::DiffResult, map(f, x))`, but without the implied temporary
+allocation (when possible).
 """
-value!(f, r::DiffResult, x::Number) = (r.value = f(x); return r)
-value!(f, r::DiffResult, x::AbstractArray) = (map!(f, value(r), x); return r)
+value!(f, r::MutableDiffResult, x::Number) = (r.value = f(x); return r)
+value!(f, r::MutableDiffResult, x::AbstractArray) = (map!(f, value(r), x); return r)
+value!(f, r::ImmutableDiffResult, x::Number) = value!(r, f(x))
+value!(f, r::ImmutableDiffResult, x::SArray) = value!(r, map(f, x))
+value!(f, r::ImmutableDiffResult, x::AbstractArray) = value!(r, map(f, typeof(value(r))(x)))
 
 # derivative/derivative! #
 #------------------------#
@@ -121,122 +173,159 @@ value!(f, r::DiffResult, x::AbstractArray) = (map!(f, value(r), x); return r)
 
 Return the `ith` derivative stored in `r`, defaulting to the first derivative.
 
-Note that this method returns a reference, not a copy. Thus, if `derivative(r)` is mutable,
-mutating `derivative(r)` will mutate `r`.
+Note that this method returns a reference, not a copy.
 """
-derivative{i}(r::DiffResult, ::Type{Val{i}} = Val{1}) = r.derivs[i]
+derivative(r::DiffResult, ::Type{Val{i}} = Val{1}) where {i} = r.derivs[i]
 
 """
     derivative!(r::DiffResult, x, ::Type{Val{i}} = Val{1})
 
-Copy `x` into `r`'s `ith` derivative storage, such that `derivative(r, Val{i}) == x`.
+Return `s::DiffResult` with the same data as `r`, except `derivative(s, Val{i}) == x`.
+
+This function may or may not mutate `r`. If `r::ImmutableDiffResult`, a totally new
+instance will be created and returned, whereas if `r::MutableDiffResult`, then `r` will be
+mutated in-place and returned. Thus, this function should be called as
+`r = derivative!(r, x, Val{i})`.
 """
-@generated function derivative!{O,i}(r::DiffResult{O}, x::Number, ::Type{Val{i}} = Val{1})
-    newderivs = Expr(:tuple, [i == n ? :(x) : :(derivative(r, Val{$n})) for n in 1:O]...)
-    return quote
-        r.derivs = $newderivs
-        return r
-    end
+function derivative!(r::MutableDiffResult, x::Number, ::Type{Val{i}} = Val{1}) where {i}
+    r.derivs = tuple_setindex(r.derivs, x, Val{i})
+    return r
 end
 
-function derivative!{i}(r::DiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1})
+function derivative!(r::MutableDiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1}) where {i}
     copy!(derivative(r, Val{i}), x)
     return r
 end
 
+function derivative!(r::ImmutableDiffResult, x::Union{Number,SArray}, ::Type{Val{i}} = Val{1}) where {i}
+    return ImmutableDiffResult(value(r), tuple_setindex(r.derivs, x, Val{i}))
+end
+
+function derivative!(r::ImmutableDiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1}) where {i}
+    T = tuple_eltype(r.derivs, Val{i})
+    return ImmutableDiffResult(value(r), tuple_setindex(r.derivs, T(x), Val{i}))
+end
+
 """
     derivative!(f, r::DiffResult, x, ::Type{Val{i}} = Val{1})
 
-Like `derivative!(r::DiffResult, x, Val{i})`, but with `f` applied to each element,
-such that `derivative(r, Val{i}) == map(f, x)`.
+Equivalent to `derivative!(r::DiffResult, map(f, x), Val{i})`, but without the implied
+temporary allocation (when possible).
 """
-@generated function derivative!{O,i}(f, r::DiffResult{O}, x::Number, ::Type{Val{i}} = Val{1})
-    newderivs = Expr(:tuple, [i == n ? :(f(x)) : :(derivative(r, Val{$n})) for n in 1:O]...)
-    return quote
-        r.derivs = $newderivs
-        return r
-    end
+function derivative!(f, r::MutableDiffResult, x::Number, ::Type{Val{i}} = Val{1}) where {i}
+    r.derivs = tuple_setindex(r.derivs, f(x), Val{i})
+    return r
 end
 
-function derivative!{i}(f, r::DiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1})
+function derivative!(f, r::MutableDiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1}) where {i}
     map!(f, derivative(r, Val{i}), x)
     return r
 end
 
+function derivative!(f, r::ImmutableDiffResult, x::Number, ::Type{Val{i}} = Val{1}) where {i}
+    return derivative!(r, f(x), Val{i})
+end
+
+function derivative!(f, r::ImmutableDiffResult, x::SArray, ::Type{Val{i}} = Val{1}) where {i}
+    return derivative!(r, map(f, x), Val{i})
+end
+
+function derivative!(f, r::ImmutableDiffResult, x::AbstractArray, ::Type{Val{i}} = Val{1}) where {i}
+    T = tuple_eltype(r.derivs, Val{i})
+    return derivative!(r, map(f, T(x)), Val{i})
+end
+
 # special-cased methods #
 #-----------------------#
 
 """
     gradient(r::DiffResult)
 
-Return the gradient stored in `r` (equivalent to `derivative(r)`).
+Return the gradient stored in `r`.
 
-Note that this method returns a reference, not a copy. Thus, if `gradient(r)` is mutable,
-mutating `gradient(r)` will mutate `r`.
+Equivalent to `derivative(r, Val{1})`; see `derivative` docs for aliasing behavior.
 """
 gradient(r::DiffResult) = derivative(r)
 
 """
     gradient!(r::DiffResult, x)
 
-Copy `x` into `r`'s gradient storage, such that `gradient(r) == x`.
+Return `s::DiffResult` with the same data as `r`, except `gradient(s) == x`.
+
+Equivalent to `derivative!(r, x, Val{1})`; see `derivative!` docs for aliasing behavior.
 """
 gradient!(r::DiffResult, x) = derivative!(r, x)
 
 """
     gradient!(f, r::DiffResult, x)
 
-Like `gradient!(r::DiffResult, x)`, but with `f` applied to each element,
-such that `gradient(r) == map(f, x)`.
+Equivalent to `gradient!(r::DiffResult, map(f, x))`, but without the implied temporary
+allocation (when possible).
+
+Equivalent to `derivative!(f, r, x, Val{1})`; see `derivative!` docs for aliasing behavior.
 """
 gradient!(f, r::DiffResult, x) = derivative!(f, r, x)
 
 """
     jacobian(r::DiffResult)
 
-Return the Jacobian stored in `r` (equivalent to `derivative(r)`).
+Return the Jacobian stored in `r`.
 
-Note that this method returns a reference, not a copy. Thus, if `jacobian(r)` is mutable,
-mutating `jacobian(r)` will mutate `r`.
+Equivalent to `derivative(r, Val{1})`; see `derivative` docs for aliasing behavior.
 """
 jacobian(r::DiffResult) = derivative(r)
 
 """
     jacobian!(r::DiffResult, x)
 
-Copy `x` into `r`'s Jacobian storage, such that `jacobian(r) == x`.
+Return `s::DiffResult` with the same data as `r`, except `jacobian(s) == x`.
+
+Equivalent to `derivative!(r, x, Val{1})`; see `derivative!` docs for aliasing behavior.
 """
 jacobian!(r::DiffResult, x) = derivative!(r, x)
 
 """
     jacobian!(f, r::DiffResult, x)
 
-Like `jacobian!(r::DiffResult, x)`, but with `f` applied to each element,
-such that `jacobian(r) == map(f, x)`.
+Equivalent to `jacobian!(r::DiffResult, map(f, x))`, but without the implied temporary
+allocation (when possible).
+
+Equivalent to `derivative!(f, r, x, Val{1})`; see `derivative!` docs for aliasing behavior.
 """
 jacobian!(f, r::DiffResult, x) = derivative!(f, r, x)
 
 """
     hessian(r::DiffResult)
 
-Return the Hessian stored in `r` (equivalent to `derivative(r, Val{2})`).
+Return the Hessian stored in `r`.
 
-Note that this method returns a reference, not a copy. Thus, if `hessian(r)` is mutable,
-mutating `hessian(r)` will mutate `r`.
+Equivalent to `derivative(r, Val{2})`; see `derivative` docs for aliasing behavior.
 """
 hessian(r::DiffResult) = derivative(r, Val{2})
 
 """
     hessian!(r::DiffResult, x)
 
-Copy `x` into `r`'s Hessian storage, such that `hessian(r) == x`.
+Return `s::DiffResult` with the same data as `r`, except `hessian(s) == x`.
+
+Equivalent to `derivative!(r, x, Val{2})`; see `derivative!` docs for aliasing behavior.
 """
 hessian!(r::DiffResult, x) = derivative!(r, x, Val{2})
 
 """
     hessian!(f, r::DiffResult, x)
 
-Like `hessian!(r::DiffResult, x)`, but with `f` applied to each element,
-such that `hessian(r) == map(f, x)`.
+Equivalent to `hessian!(r::DiffResult, map(f, x))`, but without the implied temporary
+allocation (when possible).
+
+Equivalent to `derivative!(f, r, x, Val{2})`; see `derivative!` docs for aliasing behavior.
 """
 hessian!(f, r::DiffResult, x) = derivative!(f, r, x, Val{2})
+
+###################
+# Pretty Printing #
+###################
+
+Base.show(io::IO, r::ImmutableDiffResult) = print(io, "ImmutableDiffResult($(r.value), $(r.derivs))")
+
+Base.show(io::IO, r::MutableDiffResult) = print(io, "MutableDiffResult($(r.value), $(r.derivs))")
diff --git a/test/ResultTests.jl b/test/ResultTests.jl