Skip to content

Commit dbf0654

Browse files
authored
ENH Add ADVI initializing for continuous models sampled using NUTS.
2 parents ad7dc8a + 59f500c commit dbf0654

15 files changed

+572
-529
lines changed

docs/source/notebooks/BEST.ipynb

+25-58
Large diffs are not rendered by default.

docs/source/notebooks/GLM-hierarchical.ipynb

+44-55
Large diffs are not rendered by default.

docs/source/notebooks/LKJ.ipynb

+36-64
Large diffs are not rendered by default.

docs/source/notebooks/NUTS_scaling_using_ADVI.ipynb

+173-81
Large diffs are not rendered by default.

docs/source/notebooks/cox_model.ipynb

+46-45
Large diffs are not rendered by default.

docs/source/notebooks/marginalized_gaussian_mixture_model.ipynb

+3-6
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,9 @@
292292
}
293293
],
294294
"metadata": {
295+
"anaconda-cloud": {},
295296
"kernelspec": {
296-
"display_name": "Python 3",
297+
"display_name": "Python [default]",
297298
"language": "python",
298299
"name": "python3"
299300
},
@@ -307,11 +308,7 @@
307308
"name": "python",
308309
"nbconvert_exporter": "python",
309310
"pygments_lexer": "ipython3",
310-
"version": "3.5.1"
311-
},
312-
"widgets": {
313-
"state": {},
314-
"version": "1.1.2"
311+
"version": "3.5.2"
315312
}
316313
},
317314
"nbformat": 4,

docs/source/notebooks/pmf-pymc.ipynb

+47-48
Large diffs are not rendered by default.

docs/source/notebooks/posterior_predictive.ipynb

+58-55
Large diffs are not rendered by default.

docs/source/notebooks/stochastic_volatility.ipynb

+31-107
Large diffs are not rendered by default.

pymc3/model.py

+10
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,16 @@ def as_iterargs(data):
703703
else:
704704
return [data]
705705

706+
707+
def all_continuous(vars):
708+
"""Check that vars not include discrete variables, excepting ObservedRVs.
709+
"""
710+
vars_ = [var for var in vars if not isinstance(var, pm.model.ObservedRV)]
711+
if any([var.dtype in pm.discrete_types for var in vars_]):
712+
return False
713+
else:
714+
return True
715+
706716
# theano stuff
707717
theano.config.warn.sum_div_dimshuffle_bug = False
708718
theano.config.compute_test_value = 'raise'

pymc3/sampling.py

+81-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import sys
1717
sys.setrecursionlimit(10000)
1818

19-
__all__ = ['sample', 'iter_sample', 'sample_ppc']
19+
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'init_nuts']
2020

2121

2222
def assign_step_methods(model, step=None, methods=(NUTS, HamiltonianMC, Metropolis,
@@ -81,8 +81,9 @@ def assign_step_methods(model, step=None, methods=(NUTS, HamiltonianMC, Metropol
8181
return steps
8282

8383

84-
def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None,
85-
progressbar=True, model=None, random_seed=-1):
84+
def sample(draws, step=None, init='advi', n_init=500000, start=None,
85+
trace=None, chain=0, njobs=1, tune=None, progressbar=True,
86+
model=None, random_seed=-1):
8687
"""
8788
Draw a number of samples using the given step method.
8889
Multiple step methods supported via compound step method
@@ -97,6 +98,15 @@ def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None
9798
A step function or collection of functions. If no step methods are
9899
specified, or are partially specified, they will be assigned
99100
automatically (defaults to None).
101+
init : str {'advi', 'advi_map', 'map', 'nuts'}
102+
Initialization method to use.
103+
* advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
104+
* advi_map: Initialize ADVI with MAP and use MAP as starting point.
105+
* map : Use the MAP as starting point.
106+
* nuts : Run NUTS and estimate posterior mean and covariance matrix.
107+
n_init : int
108+
Number of iterations of initializer
109+
If 'advi', number of iterations, if 'nuts', number of draws.
100110
start : dict
101111
Starting point in parameter space (or partial point)
102112
Defaults to trace.point(-1)) if there is a trace provided and
@@ -132,7 +142,14 @@ def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None
132142
"""
133143
model = modelcontext(model)
134144

135-
step = assign_step_methods(model, step)
145+
if step is None and init is not None and pm.model.all_continuous(model.vars):
146+
# By default, use NUTS sampler
147+
pm._log.info('Auto-assigning NUTS sampler...')
148+
start_, step = init_nuts(init=init, n_init=n_init, model=model)
149+
if start is None:
150+
start = start_
151+
else:
152+
step = assign_step_methods(model, step)
136153

137154
if njobs is None:
138155
import multiprocessing as mp
@@ -373,3 +390,63 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see
373390
size=size))
374391

375392
return {k: np.asarray(v) for k, v in ppc.items()}
393+
394+
395+
def init_nuts(init='advi', n_init=500000, model=None):
396+
"""Initialize and sample from posterior of a continuous model.
397+
398+
This is a convenience function. NUTS convergence and sampling speed is extremely
399+
dependent on the choice of mass/scaling matrix. In our experience, using ADVI
400+
to estimate a diagonal covariance matrix and using this as the scaling matrix
401+
produces robust results over a wide class of continuous models.
402+
403+
Parameters
404+
----------
405+
init : str {'advi', 'advi_map', 'map', 'nuts'}
406+
Initialization method to use.
407+
* advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
408+
* advi_map: Initialize ADVI with MAP and use MAP as starting point.
409+
* map : Use the MAP as starting point.
410+
* nuts : Run NUTS and estimate posterior mean and covariance matrix.
411+
n_init : int
412+
Number of iterations of initializer
413+
If 'advi', number of iterations, if 'metropolis', number of draws.
414+
model : Model (optional if in `with` context)
415+
416+
Returns
417+
-------
418+
start, nuts_sampler
419+
420+
start : pymc3.model.Point
421+
Starting point for sampler
422+
nuts_sampler : pymc3.step_methods.NUTS
423+
Instantiated and initialized NUTS sampler object
424+
"""
425+
426+
model = pm.modelcontext(model)
427+
428+
pm._log.info('Initializing NUTS using {}...'.format(init))
429+
430+
if init == 'advi':
431+
v_params = pm.variational.advi(n=n_init)
432+
start = pm.variational.sample_vp(v_params, 1)[0]
433+
cov = np.power(model.dict_to_array(v_params.stds), 2)
434+
elif init == 'advi_map':
435+
start = pm.find_MAP()
436+
v_params = pm.variational.advi(n=n_init, start=start)
437+
cov = np.power(model.dict_to_array(v_params.stds), 2)
438+
elif init == 'map':
439+
start = pm.find_MAP()
440+
cov = pm.find_hessian(point=start)
441+
442+
elif init == 'nuts':
443+
init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
444+
cov = pm.trace_cov(init_trace[n_init//2:])
445+
446+
start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames}
447+
else:
448+
raise NotImplemented('Initializer {} is not supported.'.format(init))
449+
450+
step = pm.NUTS(scaling=cov, is_cov=True)
451+
452+
return start, step

pymc3/tests/test_diagnostics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ def get_ptrace(self, n_samples):
2020
# Run sampler
2121
step1 = Slice([model.early_mean_log_, model.late_mean_log_])
2222
step2 = Metropolis([model.switchpoint])
23-
start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
23+
start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 90}
2424
ptrace = sample(n_samples, [step1, step2], start, njobs=2, progressbar=False,
25-
random_seed=[1, 3])
25+
random_seed=[1, 4])
2626
return ptrace
2727

2828
def test_good(self):

pymc3/tests/test_sampling.py

+8
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,14 @@ def test_sample(self):
6161
for steps in [1, 10, 300]:
6262
pm.sample(steps, self.step, {}, None, njobs=njobs, random_seed=self.random_seed)
6363

64+
def test_sample_init(self):
65+
with self.model:
66+
for init in ('advi', 'advi_map', 'map', 'nuts'):
67+
pm.sample(init=init,
68+
n_init=1000, draws=50,
69+
random_seed=self.random_seed)
70+
71+
6472
def test_iter_sample(self):
6573
with self.model:
6674
samps = pm.sampling.iter_sample(5, self.step, self.start, random_seed=self.random_seed)

pymc3/tuning/scaling.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ def eig_recompose(val, vec):
138138
return vec.dot(np.diag(val)).dot(vec.T)
139139

140140

141-
def trace_cov(trace, vars=None):
141+
def trace_cov(trace, vars=None, model=None):
142142
"""
143143
Calculate the flattened covariance matrix using a sample trace
144144
@@ -155,9 +155,12 @@ def trace_cov(trace, vars=None):
155155
r : array (n,n)
156156
covariance matrix
157157
"""
158+
model = modelcontext(model)
158159

159-
if vars is None:
160-
vars = trace.samples.keys
160+
if model is not None:
161+
vars = model.free_RVs
162+
elif vars is None:
163+
vars = trace.varnames
161164

162165
def flat_t(var):
163166
x = trace[str(var)]

pymc3/variational/advi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
110110
vars = model.vars
111111
vars = pm.inputvars(vars)
112112

113-
check_discrete_rvs(vars)
113+
if not pm.model.all_continuous(vars):
114+
raise ValueError('Model should not include discrete RVs for ADVI.')
114115

115116
n_mcsamples = 100 if accurate_elbo else 1
116117

0 commit comments

Comments
 (0)