pymc-devs · twiecki · Nov 18, 2016 · Nov 11, 2016 · Nov 11, 2016 · Nov 11, 2016
diff --git a/docs/source/notebooks/BEST.ipynb b/docs/source/notebooks/BEST.ipynb
diff --git a/docs/source/notebooks/GLM-hierarchical.ipynb b/docs/source/notebooks/GLM-hierarchical.ipynb
diff --git a/docs/source/notebooks/LKJ.ipynb b/docs/source/notebooks/LKJ.ipynb
diff --git a/docs/source/notebooks/NUTS_scaling_using_ADVI.ipynb b/docs/source/notebooks/NUTS_scaling_using_ADVI.ipynb
diff --git a/docs/source/notebooks/cox_model.ipynb b/docs/source/notebooks/cox_model.ipynb
diff --git a/docs/source/notebooks/marginalized_gaussian_mixture_model.ipynb b/docs/source/notebooks/marginalized_gaussian_mixture_model.ipynb
@@ -292,8 +292,9 @@
   }
  ],
  "metadata": {
+  "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [default]",
    "language": "python",
    "name": "python3"
   },
@@ -307,11 +308,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
-  },
-  "widgets": {
-   "state": {},
-   "version": "1.1.2"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,

diff --git a/docs/source/notebooks/pmf-pymc.ipynb b/docs/source/notebooks/pmf-pymc.ipynb
diff --git a/docs/source/notebooks/posterior_predictive.ipynb b/docs/source/notebooks/posterior_predictive.ipynb
diff --git a/docs/source/notebooks/stochastic_volatility.ipynb b/docs/source/notebooks/stochastic_volatility.ipynb
diff --git a/pymc3/model.py b/pymc3/model.py
@@ -703,6 +703,16 @@ def as_iterargs(data):
     else:
         return [data]
 
+
+def all_continuous(vars):
+    """Check that vars not include discrete variables, excepting ObservedRVs.
+    """
+    vars_ = [var for var in vars if not isinstance(var, pm.model.ObservedRV)]
+    if any([var.dtype in pm.discrete_types for var in vars_]):
+        return False
+    else:
+        return True
+
 # theano stuff
 theano.config.warn.sum_div_dimshuffle_bug = False
 theano.config.compute_test_value = 'raise'
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
@@ -16,7 +16,7 @@
 import sys
 sys.setrecursionlimit(10000)
 
-__all__ = ['sample', 'iter_sample', 'sample_ppc']
+__all__ = ['sample', 'iter_sample', 'sample_ppc', 'init_nuts']
 
 
 def assign_step_methods(model, step=None, methods=(NUTS, HamiltonianMC, Metropolis,
@@ -81,8 +81,9 @@ def assign_step_methods(model, step=None, methods=(NUTS, HamiltonianMC, Metropol
     return steps
 
 
-def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None,
-           progressbar=True, model=None, random_seed=-1):
+def sample(draws, step=None, init='advi', n_init=500000, start=None,
+           trace=None, chain=0, njobs=1, tune=None, progressbar=True,
+           model=None, random_seed=-1):
     """
     Draw a number of samples using the given step method.
     Multiple step methods supported via compound step method
@@ -97,6 +98,15 @@ def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None
         A step function or collection of functions. If no step methods are
         specified, or are partially specified, they will be assigned
         automatically (defaults to None).
+    init : str {'advi', 'advi_map', 'map', 'nuts'}
+        Initialization method to use.
+        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
+        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
+        * map : Use the MAP as starting point.
+        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
+    n_init : int
+        Number of iterations of initializer
+        If 'advi', number of iterations, if 'nuts', number of draws.
     start : dict
         Starting point in parameter space (or partial point)
         Defaults to trace.point(-1)) if there is a trace provided and
@@ -132,7 +142,14 @@ def sample(draws, step=None, start=None, trace=None, chain=0, njobs=1, tune=None
     """
     model = modelcontext(model)
 
-    step = assign_step_methods(model, step)
+    if step is None and init is not None and pm.model.all_continuous(model.vars):
+        # By default, use NUTS sampler
+        pm._log.info('Auto-assigning NUTS sampler...')
+        start_, step = init_nuts(init=init, n_init=n_init, model=model)
+        if start is None:
+            start = start_
+    else:
+        step = assign_step_methods(model, step)
 
     if njobs is None:
         import multiprocessing as mp
@@ -373,3 +390,63 @@ def sample_ppc(trace, samples=None, model=None, vars=None, size=None, random_see
                                                          size=size))
 
     return {k: np.asarray(v) for k, v in ppc.items()}
+
+
+def init_nuts(init='advi', n_init=500000, model=None):
+    """Initialize and sample from posterior of a continuous model.
+
+    This is a convenience function. NUTS convergence and sampling speed is extremely
+    dependent on the choice of mass/scaling matrix. In our experience, using ADVI
+    to estimate a diagonal covariance matrix and using this as the scaling matrix
+    produces robust results over a wide class of continuous models.
+
+    Parameters
+    ----------
+    init : str {'advi', 'advi_map', 'map', 'nuts'}
+        Initialization method to use.
+        * advi : Run ADVI to estimate posterior mean and diagonal covariance matrix.
+        * advi_map: Initialize ADVI with MAP and use MAP as starting point.
+        * map : Use the MAP as starting point.
+        * nuts : Run NUTS and estimate posterior mean and covariance matrix.
+    n_init : int
+        Number of iterations of initializer
+        If 'advi', number of iterations, if 'metropolis', number of draws.
+    model : Model (optional if in `with` context)
+
+    Returns
+    -------
+    start, nuts_sampler
+
+    start : pymc3.model.Point
+        Starting point for sampler
+    nuts_sampler : pymc3.step_methods.NUTS
+        Instantiated and initialized NUTS sampler object
+    """
+
+    model = pm.modelcontext(model)
+
+    pm._log.info('Initializing NUTS using {}...'.format(init))
+
+    if init == 'advi':
+        v_params = pm.variational.advi(n=n_init)
+        start = pm.variational.sample_vp(v_params, 1)[0]
+        cov = np.power(model.dict_to_array(v_params.stds), 2)
+    elif init == 'advi_map':
+        start = pm.find_MAP()
+        v_params = pm.variational.advi(n=n_init, start=start)
+        cov = np.power(model.dict_to_array(v_params.stds), 2)
+    elif init == 'map':
+        start = pm.find_MAP()
+        cov = pm.find_hessian(point=start)
+
+    elif init == 'nuts':
+        init_trace = pm.sample(step=pm.NUTS(), draws=n_init)
+        cov = pm.trace_cov(init_trace[n_init//2:])
+
+        start = {varname: np.mean(init_trace[varname]) for varname in init_trace.varnames}
+    else:
+        raise NotImplemented('Initializer {} is not supported.'.format(init))
+
+    step = pm.NUTS(scaling=cov, is_cov=True)
+
+    return start, step
diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py
@@ -20,9 +20,9 @@ def get_ptrace(self, n_samples):
             # Run sampler
             step1 = Slice([model.early_mean_log_, model.late_mean_log_])
             step2 = Metropolis([model.switchpoint])
-            start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
+            start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 90}
             ptrace = sample(n_samples, [step1, step2], start, njobs=2, progressbar=False,
-                            random_seed=[1, 3])
+                            random_seed=[1, 4])
         return ptrace
 
     def test_good(self):

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
@@ -61,6 +61,14 @@ def test_sample(self):
                 for steps in [1, 10, 300]:
                     pm.sample(steps, self.step, {}, None, njobs=njobs, random_seed=self.random_seed)
 
+    def test_sample_init(self):
+        with self.model:
+            for init in ('advi', 'advi_map', 'map', 'nuts'):
+                pm.sample(init=init,
+                          n_init=1000, draws=50,
+                          random_seed=self.random_seed)
+
+
     def test_iter_sample(self):
         with self.model:
             samps = pm.sampling.iter_sample(5, self.step, self.start, random_seed=self.random_seed)

diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py
@@ -138,7 +138,7 @@ def eig_recompose(val, vec):
     return vec.dot(np.diag(val)).dot(vec.T)
 
 
-def trace_cov(trace, vars=None):
+def trace_cov(trace, vars=None, model=None):
     """
     Calculate the flattened covariance matrix using a sample trace
 
@@ -155,9 +155,12 @@ def trace_cov(trace, vars=None):
     r : array (n,n)
         covariance matrix
     """
+    model = modelcontext(model)
 
-    if vars is None:
-        vars = trace.samples.keys
+    if model is not None:
+        vars = model.free_RVs
+    elif vars is None:
+        vars = trace.varnames
 
     def flat_t(var):
         x = trace[str(var)]

diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py
@@ -110,7 +110,8 @@ def advi(vars=None, start=None, model=None, n=5000, accurate_elbo=False,
         vars = model.vars
     vars = pm.inputvars(vars)
 
-    check_discrete_rvs(vars)
+    if not pm.model.all_continuous(vars):
+        raise ValueError('Model should not include discrete RVs for ADVI.')
 
     n_mcsamples = 100 if accurate_elbo else 1