diff --git a/HISTORY.md b/HISTORY.md index f43f543c3..52616315d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,10 @@ # DynamicPPL Changelog +## 0.35.8 + +Added the `DynamicPPL.TestUtils.AD.run_ad` function to test the correctness and/or benchmark the performance of an automatic differentiation backend on DynamicPPL models. +Please see [the docstring](https://turinglang.org/DynamicPPL.jl/api/#DynamicPPL.TestUtils.AD.run_ad) for more information. + ## 0.35.7 `check_model_and_trace` now errors if any NaN's are encountered when evaluating the model. diff --git a/Project.toml b/Project.toml index 5790eaeb1..6c72e55d9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DynamicPPL" uuid = "366bfd00-2699-11ea-058f-f148b4cae6d8" -version = "0.35.7" +version = "0.35.8" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -10,6 +10,7 @@ Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" BangBang = "198e06fe-97b7-11e9-32a5-e1d131e6ad66" Bijectors = "76274a88-744f-5084-9051-94815aaf08c4" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" @@ -22,6 +23,7 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [weakdeps] @@ -49,6 +51,7 @@ Accessors = "0.1" BangBang = "0.4.1" Bijectors = "0.13.18, 0.14, 0.15" ChainRulesCore = "1" +Chairmarks = "1.3.1" Compat = "4" ConstructionBase = "1.5.4" DifferentiationInterface = "0.6.41" @@ -67,5 +70,6 @@ Mooncake = "0.4.95" OrderedCollections = "1" Random = "1.6" Requires = "1" +Statistics = "1" Test = "1.6" julia = "1.10" diff --git a/docs/src/api.md b/docs/src/api.md index 9c8249c97..7af4074e2 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -205,7 +205,16 @@ values_as_in_model NamedDist ``` -## Testing Utilities +## AD testing and benchmarking utilities + +To test and/or benchmark the performance of an AD backend on a model, DynamicPPL provides the following utilities: + +```@docs +DynamicPPL.TestUtils.AD.run_ad +DynamicPPL.TestUtils.AD.ADResult +``` + +## Demo models DynamicPPL provides several demo models and helpers for testing samplers in the `DynamicPPL.TestUtils` submodule. diff --git a/src/DynamicPPL.jl b/src/DynamicPPL.jl index 50fe0edc7..6e26d9a97 100644 --- a/src/DynamicPPL.jl +++ b/src/DynamicPPL.jl @@ -175,7 +175,6 @@ include("context_implementations.jl") include("compiler.jl") include("pointwise_logdensities.jl") include("submodel_macro.jl") -include("test_utils.jl") include("transforming.jl") include("logdensityfunction.jl") include("model_utils.jl") @@ -184,6 +183,7 @@ include("values_as_in_model.jl") include("debug_utils.jl") using .DebugUtils +include("test_utils.jl") include("experimental.jl") include("deprecated.jl") diff --git a/src/test_utils.jl b/src/test_utils.jl index c7d12c927..65079f023 100644 --- a/src/test_utils.jl +++ b/src/test_utils.jl @@ -18,5 +18,6 @@ include("test_utils/models.jl") include("test_utils/contexts.jl") include("test_utils/varinfo.jl") include("test_utils/sampler.jl") +include("test_utils/ad.jl") end diff --git a/src/test_utils/ad.jl b/src/test_utils/ad.jl new file mode 100644 index 000000000..06c76df5e --- /dev/null +++ b/src/test_utils/ad.jl @@ -0,0 +1,209 @@ +module AD + +using ADTypes: AbstractADType, AutoForwardDiff +using Chairmarks: @be +import DifferentiationInterface as DI +using DocStringExtensions +using DynamicPPL: Model, LogDensityFunction, VarInfo, AbstractVarInfo +using LogDensityProblems: logdensity, logdensity_and_gradient +using Random: Random, Xoshiro +using Statistics: median +using Test: @test + +export ADResult, run_ad + +# This function needed to work around the fact that different backends can +# return different AbstractArrays for the gradient. See +# https://github.com/JuliaDiff/DifferentiationInterface.jl/issues/754 for more +# context. +_to_vec_f64(x::AbstractArray) = x isa Vector{Float64} ? x : collect(Float64, x) + +""" + REFERENCE_ADTYPE + +Reference AD backend to use for comparison. In this case, ForwardDiff.jl, since +it's the default AD backend used in Turing.jl. +""" +const REFERENCE_ADTYPE = AutoForwardDiff() + +""" + ADResult + +Data structure to store the results of the AD correctness test. +""" +struct ADResult + "The DynamicPPL model that was tested" + model::Model + "The VarInfo that was used" + varinfo::AbstractVarInfo + "The values at which the model was evaluated" + params::Vector{<:Real} + "The AD backend that was tested" + adtype::AbstractADType + "The absolute tolerance for the value of logp" + value_atol::Real + "The absolute tolerance for the gradient of logp" + grad_atol::Real + "The expected value of logp" + value_expected::Union{Nothing,Float64} + "The expected gradient of logp" + grad_expected::Union{Nothing,Vector{Float64}} + "The value of logp (calculated using `adtype`)" + value_actual::Union{Nothing,Real} + "The gradient of logp (calculated using `adtype`)" + grad_actual::Union{Nothing,Vector{Float64}} + "If benchmarking was requested, the time taken by the AD backend to calculate the gradient of logp, divided by the time taken to evaluate logp itself" + time_vs_primal::Union{Nothing,Float64} +end + +""" + run_ad( + model::Model, + adtype::ADTypes.AbstractADType; + test=true, + benchmark=false, + value_atol=1e-6, + grad_atol=1e-6, + varinfo::AbstractVarInfo=VarInfo(model), + params::Vector{<:Real}=varinfo[:], + reference_adtype::ADTypes.AbstractADType=REFERENCE_ADTYPE, + expected_value_and_grad::Union{Nothing,Tuple{Real,Vector{<:Real}}}=nothing, + verbose=true, + )::ADResult + +Test the correctness and/or benchmark the AD backend `adtype` for the model +`model`. + +Whether to test and benchmark is controlled by the `test` and `benchmark` +keyword arguments. By default, `test` is `true` and `benchmark` is `false`. + +Returns an [`ADResult`](@ref) object, which contains the results of the +test and/or benchmark. + +Note that to run AD successfully you will need to import the AD backend itself. +For example, to test with `AutoReverseDiff()` you will need to run `import +ReverseDiff`. + +There are two positional arguments, which absolutely must be provided: + +1. `model` - The model being tested. +2. `adtype` - The AD backend being tested. + +Everything else is optional, and can be categorised into several groups: + +1. _How to specify the VarInfo._ + + DynamicPPL contains several different types of VarInfo objects which change + the way model evaluation occurs. If you want to use a specific type of + VarInfo, pass it as the `varinfo` argument. Otherwise, it will default to + using a `TypedVarInfo` generated from the model. + +2. _How to specify the parameters._ + + For maximum control over this, generate a vector of parameters yourself and + pass this as the `params` argument. If you don't specify this, it will be + taken from the contents of the VarInfo. + + Note that if the VarInfo is not specified (and thus automatically generated) + the parameters in it will have been sampled from the prior of the model. If + you want to seed the parameter generation, the easiest way is to pass a + `rng` argument to the VarInfo constructor (i.e. do `VarInfo(rng, model)`). + + Finally, note that these only reflect the parameters used for _evaluating_ + the gradient. If you also want to control the parameters used for + _preparing_ the gradient, then you need to manually set these parameters in + the VarInfo object, for example using `vi = DynamicPPL.unflatten(vi, + prep_params)`. You could then evaluate the gradient at a different set of + parameters using the `params` keyword argument. + +3. _How to specify the results to compare against._ (Only if `test=true`.) + + Once logp and its gradient has been calculated with the specified `adtype`, + it must be tested for correctness. + + This can be done either by specifying `reference_adtype`, in which case logp + and its gradient will also be calculated with this reference in order to + obtain the ground truth; or by using `expected_value_and_grad`, which is a + tuple of `(logp, gradient)` that the calculated values must match. The + latter is useful if you are testing multiple AD backends and want to avoid + recalculating the ground truth multiple times. + + The default reference backend is ForwardDiff. If none of these parameters are + specified, ForwardDiff will be used to calculate the ground truth. + +4. _How to specify the tolerances._ (Only if `test=true`.) + + The tolerances for the value and gradient can be set using `value_atol` and + `grad_atol`. These default to 1e-6. + +5. _Whether to output extra logging information._ + + By default, this function prints messages when it runs. To silence it, set + `verbose=false`. +""" +function run_ad( + model::Model, + adtype::AbstractADType; + test=true, + benchmark=false, + value_atol=1e-6, + grad_atol=1e-6, + varinfo::AbstractVarInfo=VarInfo(model), + params::Vector{<:Real}=varinfo[:], + reference_adtype::AbstractADType=REFERENCE_ADTYPE, + expected_value_and_grad::Union{Nothing,Tuple{Real,Vector{<:Real}}}=nothing, + verbose=true, +)::ADResult + verbose && @info "Running AD on $(model.f) with $(adtype)\n" + params = map(identity, params) + verbose && println(" params : $(params)") + ldf = LogDensityFunction(model, varinfo; adtype=adtype) + + value, grad = logdensity_and_gradient(ldf, params) + grad = _to_vec_f64(grad) + verbose && println(" actual : $((value, grad))") + + if test + # Calculate ground truth to compare against + value_true, grad_true = if expected_value_and_grad === nothing + ldf_reference = LogDensityFunction(model, varinfo; adtype=reference_adtype) + logdensity_and_gradient(ldf_reference, params) + else + expected_value_and_grad + end + verbose && println(" expected : $((value_true, grad_true))") + grad_true = _to_vec_f64(grad_true) + # Then compare + @test isapprox(value, value_true; atol=value_atol) + @test isapprox(grad, grad_true; atol=grad_atol) + else + value_true = nothing + grad_true = nothing + end + + time_vs_primal = if benchmark + primal_benchmark = @be (ldf, params) logdensity(_[1], _[2]) + grad_benchmark = @be (ldf, params) logdensity_and_gradient(_[1], _[2]) + t = median(grad_benchmark).time / median(primal_benchmark).time + verbose && println("grad / primal : $(t)") + t + else + nothing + end + + return ADResult( + model, + varinfo, + params, + adtype, + value_atol, + grad_atol, + value_true, + grad_true, + value, + grad, + time_vs_primal, + ) +end + +end # module DynamicPPL.TestUtils.AD diff --git a/test/ad.jl b/test/ad.jl index a4f3dbfa7..33d581228 100644 --- a/test/ad.jl +++ b/test/ad.jl @@ -56,10 +56,12 @@ using DynamicPPL: LogDensityFunction ref_ldf, adtype ) else - ldf = DynamicPPL.LogDensityFunction(ref_ldf, adtype) - logp, grad = LogDensityProblems.logdensity_and_gradient(ldf, x) - @test grad ≈ ref_grad - @test logp ≈ ref_logp + DynamicPPL.TestUtils.AD.run_ad( + m, + adtype; + varinfo=varinfo, + expected_value_and_grad=(ref_logp, ref_grad), + ) end end end