From 5696ed49a2fd50b7662a0089e6c2573a2600a8ac Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Fri, 6 May 2016 20:56:13 -0500
Subject: [PATCH 01/17] initial fix of shape logic in MvNormal

---
 pymc3/distributions/multivariate.py | 34 +++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 456f40be59..d6732d554c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -97,6 +97,7 @@ def random(self, point=None, size=None):
         mu, cov = draw_values([self.mu, self.cov], point=point)
 
         def _random(mean, cov, size=None):
+            # FIXME: cov here is actually precision?
             return stats.multivariate_normal.rvs(
                 mean, cov, None if size == mean.shape else size)
 
@@ -118,6 +119,38 @@ def logp(self, value):
         result += (delta.dot(tau) * delta).sum(axis=delta.ndim - 1)
         return -1 / 2. * result
 
+    #def logp(self, value):
+    #    mu = tt.as_tensor_variable(self.mu)
+    #    tau = tt.as_tensor_variable(self.tau)
+
+    #    reps_shape_T = tau.shape[:-2]
+    #    reps_shape_prod = tt.prod(reps_shape_T, keepdims=True)
+    #    dist_shape_T = mu.shape[-1:]
+
+    #    # collapse reps dimensions
+    #    flat_supp_shape = tt.concatenate([reps_shape_prod, dist_shape_T])
+    #    mus_collapsed = mu.reshape(flat_supp_shape, ndim=2)
+    #    taus_collapsed = tau.reshape(tt.concatenate([reps_shape_prod,
+    #        dist_shape_T, dist_shape_T]), ndim=3)
+
+    #    # force value to conform to reps_shape
+    #    value_reshape = tt.ones_like(mu) * value
+    #    values_collapsed = value_reshape.reshape(flat_supp_shape, ndim=2)
+
+    #    def single_logl(_mu, _tau, _value, k):
+    #        delta = _value - _mu
+    #        result = k * tt.log(2 * np.pi) + tt.log(det(_tau))
+    #        result += tt.square(delta.dot(_tau)).sum(axis=-1)
+    #        return -result/2
+
+    #    from theano import scan
+    #    res, _ = scan(fn=single_logl
+    #            , sequences=[mus_collapsed, taus_collapsed, values_collapsed]
+    #            , non_sequences=[dist_shape_T]
+    #            , strict=True
+    #            )
+    #    return res.sum()
+
 
 class MvStudentT(Continuous):
     R"""
@@ -170,6 +203,7 @@ def logp(self, value):
         mu = self.mu
 
         d = S.shape[0]
+        n = value.shape[0]
 
         X = value - mu
 

From 157125577b25bc4b225d63436a2966997d23ded6 Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Wed, 18 May 2016 15:59:02 -0500
Subject: [PATCH 02/17] initial signature/interface changes

---
 pymc3/distributions/continuous.py   | 44 +++++++++++---------
 pymc3/distributions/distribution.py | 64 +++++++++++++++++++++++------
 pymc3/distributions/multivariate.py |  3 +-
 pymc3/distributions/timeseries.py   |  6 +--
 pymc3/model.py                      |  9 ++--
 5 files changed, 85 insertions(+), 41 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 58b4ae0947..88c37d1f0b 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -125,7 +125,7 @@ class Uniform(Continuous):
 
     def __init__(self, lower=0, upper=1, transform='interval',
                  *args, **kwargs):
-        super(Uniform, self).__init__(*args, **kwargs)
+        super(Uniform, self).__init__(ndim=1, *args, **kwargs)
 
         self.lower = lower
         self.upper = upper
@@ -157,7 +157,7 @@ class Flat(Continuous):
     """
 
     def __init__(self, *args, **kwargs):
-        super(Flat, self).__init__(*args, **kwargs)
+        super(Flat, self).__init__(ndim=1, *args, **kwargs)
         self.median = 0
 
     def random(self, point=None, size=None, repeat=None):
@@ -200,8 +200,12 @@ class Normal(Continuous):
     tau : float
         Precision (tau > 0).
     """
+<<<<<<< 5696ed49a2fd50b7662a0089e6c2573a2600a8ac
 
     def __init__(self, *args, **kwargs):
+
+        super(Normal, self).__init__(ndim=1, *args, **kwargs)
+
         # FIXME In order to catch the case where Normal('x', 0, .1) is
         # called to display a warning we have to fetch the args and
         # kwargs manually.  After a certain period we should revert
@@ -270,9 +274,8 @@ class HalfNormal(PositiveContinuous):
     tau : float
         Precision (tau > 0).
     """
-
     def __init__(self, sd=None, tau=None, *args, **kwargs):
-        super(HalfNormal, self).__init__(*args, **kwargs)
+        super(HalfNormal, self).__init__(ndim=1, *args, **kwargs)
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
         self.variance = (1. - 2 / np.pi) / self.tau
@@ -351,7 +354,7 @@ class Wald(PositiveContinuous):
     """
 
     def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
-        super(Wald, self).__init__(*args, **kwargs)
+        super(Wald, self).__init__(ndim=1, *args, **kwargs)
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
         self.mean = self.mu + alpha
@@ -456,7 +459,7 @@ class Beta(UnitContinuous):
 
     def __init__(self, alpha=None, beta=None, mu=None, sd=None,
                  *args, **kwargs):
-        super(Beta, self).__init__(*args, **kwargs)
+        super(Beta, self).__init__(ndim=1, *args, **kwargs)
 
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
@@ -519,7 +522,7 @@ class Exponential(PositiveContinuous):
     """
 
     def __init__(self, lam, *args, **kwargs):
-        super(Exponential, self).__init__(*args, **kwargs)
+        super(Exponential, self).__init__(ndim=1, *args, **kwargs)
         self.lam = lam
         self.mean = 1. / lam
         self.median = self.mean * tt.log(2)
@@ -564,7 +567,7 @@ class Laplace(Continuous):
     """
 
     def __init__(self, mu, b, *args, **kwargs):
-        super(Laplace, self).__init__(*args, **kwargs)
+        super(Laplace, self).__init__(ndim=1, *args, **kwargs)
         self.b = b
         self.mean = self.median = self.mode = self.mu = mu
 
@@ -613,9 +616,10 @@ class Lognormal(PositiveContinuous):
     tau : float
         Scale parameter (tau > 0).
     """
+<<<<<<< 5696ed49a2fd50b7662a0089e6c2573a2600a8ac
 
     def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs):
-        super(Lognormal, self).__init__(*args, **kwargs)
+        super(Lognormal, self).__init__(ndim=1, *args, **kwargs)
 
         self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
@@ -677,7 +681,7 @@ class StudentT(Continuous):
     """
 
     def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
-        super(StudentT, self).__init__(*args, **kwargs)
+        super(StudentT, self).__init__(ndim=1, **args, **kwargs)
         self.nu = nu = tt.as_tensor_variable(nu)
         self.lam, self.sd = get_tau_sd(tau=lam, sd=sd)
         self.mean = self.median = self.mode = self.mu = mu
@@ -736,7 +740,7 @@ class Pareto(PositiveContinuous):
     """
 
     def __init__(self, alpha, m, *args, **kwargs):
-        super(Pareto, self).__init__(*args, **kwargs)
+        super(Pareto, self).__init__(ndim=1, *args, **kwargs)
         self.alpha = alpha
         self.m = m
         self.mean = tt.switch(tt.gt(alpha, 1), alpha *
@@ -797,7 +801,7 @@ class Cauchy(Continuous):
     """
 
     def __init__(self, alpha, beta, *args, **kwargs):
-        super(Cauchy, self).__init__(*args, **kwargs)
+        super(Cauchy, self).__init__(ndim=1, *args, **kwargs)
         self.median = self.mode = self.alpha = alpha
         self.beta = beta
 
@@ -844,7 +848,7 @@ class HalfCauchy(PositiveContinuous):
     """
 
     def __init__(self, beta, *args, **kwargs):
-        super(HalfCauchy, self).__init__(*args, **kwargs)
+        super(HalfCauchy, self).__init__(ndim=1, *args, **kwargs)
         self.mode = 0
         self.median = beta
         self.beta = beta
@@ -909,7 +913,7 @@ class Gamma(PositiveContinuous):
 
     def __init__(self, alpha=None, beta=None, mu=None, sd=None,
                  *args, **kwargs):
-        super(Gamma, self).__init__(*args, **kwargs)
+        super(Gamma, self).__init__(ndim=1, *args, **kwargs)
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -978,7 +982,7 @@ class InverseGamma(PositiveContinuous):
     """
 
     def __init__(self, alpha, beta=1, *args, **kwargs):
-        super(InverseGamma, self).__init__(*args, **kwargs)
+        super(InverseGamma, self).__init__(ndim=1, *args, **kwargs)
         self.alpha = alpha
         self.beta = beta
         self.mean = self._calculate_mean()
@@ -1034,7 +1038,7 @@ class ChiSquared(Gamma):
 
     def __init__(self, nu, *args, **kwargs):
         self.nu = nu
-        super(ChiSquared, self).__init__(alpha=nu / 2., beta=0.5,
+        super(ChiSquared, self).__init__(ndim=1, alpha=nu / 2., beta=0.5,
                                          *args, **kwargs)
 
 
@@ -1063,7 +1067,7 @@ class Weibull(PositiveContinuous):
     """
 
     def __init__(self, alpha, beta, *args, **kwargs):
-        super(Weibull, self).__init__(*args, **kwargs)
+        super(Weibull, self).__init__(ndim=1, *args, **kwargs)
         self.alpha = alpha
         self.beta = beta
         self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
@@ -1256,7 +1260,7 @@ class ExGaussian(Continuous):
     """
 
     def __init__(self, mu, sigma, nu, *args, **kwargs):
-        super(ExGaussian, self).__init__(*args, **kwargs)
+        super(ExGaussian, self).__init__(ndim=1, *args, **kwargs)
         self.mu = mu
         self.sigma = sigma
         self.nu = nu
@@ -1318,7 +1322,7 @@ class VonMises(Continuous):
 
     def __init__(self, mu=0.0, kappa=None, transform='circular',
                  *args, **kwargs):
-        super(VonMises, self).__init__(*args, **kwargs)
+        super(VonMises, self).__init__(ndim=1, *args, **kwargs)
         self.mean = self.median = self.mode = self.mu = mu
         self.kappa = kappa
         self.variance = 1 - i1(kappa) / i0(kappa)
@@ -1381,7 +1385,7 @@ class SkewNormal(Continuous):
 
     """
     def __init__(self, mu=0.0, sd=None, tau=None, alpha=1,  *args, **kwargs):
-        super(SkewNormal, self).__init__(*args, **kwargs)
+        super(SkewNormal, self).__init__(ndim=1, *args, **kwargs)
         self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.alpha = alpha
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 61570ceaa3..be89370c75 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -41,15 +41,52 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None):
-        self.shape = np.atleast_1d(shape)
-        if False in (np.floor(self.shape) == self.shape):
-            raise TypeError("Expected int elements in shape")
+    def __init__(self, ndim, size, dtype, testval=None, defaults=[], transform=None):
+        """
+        Parameters
+        ----------
+        ndim
+            Dimensions of the support for this distribution type.
+        size
+            Shape/dimensions of the space that will contain
+            independent draws under this distribution.
+        dtype
+            The Theano data type.
+        testval
+            Test value to be added to the Theano variable's tag.test_value.
+        defaults
+            List of string names for attributes that can be used as this
+            distribution's default value.
+        transform
+            A random variable transform to be applied?
+        """
+        #
+        # Following Theano Op's handling of shape parameters (well, at least
+        # theano.tensor.raw_random.RandomFunction).
+        #
+        #support_shape_ = T.as_tensor_variable(, ndim=1)
+        #if support_shape == ():
+        #    self.support_shape = support_shape_.astype('int64')
+        #else:
+        #    self.support_shape = support_shape_
+
+        #if self.support_shape.type.ndim != 1 or \
+        #    not (self.support_shape.type.dtype == 'int64') and \
+        #    not (self.support_shape.type.dtype == 'int32'):
+        #    raise TypeError("Expected int elements in shape")
+
+        from theano.tensor.raw_random import _infer_ndim_bcast
+        import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+
+        ndim, size, bcast = _infer_ndim_bcast(ndim, size, testval)
+
+        self.ndim = ndim
+        self.size = size
         self.dtype = dtype
-        self.type = TensorType(self.dtype, self.shape)
         self.testval = testval
         self.defaults = defaults
         self.transform = transform
+        self.type = T.TensorType(str(self.dtype), bcast)
 
     def default(self):
         return self.get_test_val(self.testval, self.defaults)
@@ -104,33 +141,36 @@ def logp(self, x):
 class Discrete(Distribution):
     """Base class for discrete distributions"""
 
-    def __init__(self, shape=(), dtype='int64', defaults=['mode'], *args, **kwargs):
+    def __init__(self, ndim, size=(), dtype='int64', defaults=['mode'], *args, **kwargs):
         if dtype != 'int64':
             raise TypeError('Discrete classes expect dtype to be int64.')
         super(Discrete, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+            ndim, size, dtype, defaults=defaults, *args, **kwargs)
 
 
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, shape=(), dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
+    def __init__(self, ndim, size=(), dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
         super(Continuous, self).__init__(
-            shape, dtype, defaults=defaults, *args, **kwargs)
+            ndim, size, dtype, defaults=defaults, *args, **kwargs)
 
 
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, shape=(), dtype='float64', testval=0, *args, **kwargs):
+    def __init__(self, logp, ndim, size=(), dtype='float64', testval=0, *args, **kwargs):
         super(DensityDist, self).__init__(
-            shape, dtype, testval, *args, **kwargs)
+            ndim, size, dtype, testval, *args, **kwargs)
         self.logp = logp
 
 
 class MultivariateContinuous(Continuous):
 
-    pass
+    def __init__(self, ndim, size=(), *args, **kwargs):
+        super(MultivariateContinuous, self).__init__(
+            ndim, size, *args, **kwargs)
+
 
 
 class MultivariateDiscrete(Discrete):
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index d6732d554c..f5bc060045 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -59,7 +59,7 @@ def get_tau_cov(mu, tau=None, cov=None):
     return (tau, cov)
 
 class MvNormal(Continuous):
-    R"""
+    r"""
     Multivariate normal log-likelihood.
 
     .. math::
@@ -363,6 +363,7 @@ def logp(self, x):
         )
 
 
+
 def posdef(AA):
     try:
         np.linalg.cholesky(AA)
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index 8dd2e282fb..94fc95bb34 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -20,7 +20,7 @@ class AR1(Continuous):
     """
 
     def __init__(self, k, tau_e, *args, **kwargs):
-        super(AR1, self).__init__(*args, **kwargs)
+        super(AR1, self).__init__(ndim=1, size=(), *args, **kwargs)
         self.k = k
         self.tau_e = tau_e
         self.tau = tau_e * (1 - k ** 2)
@@ -56,7 +56,7 @@ class GaussianRandomWalk(Continuous):
 
     def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0.,
                  *args, **kwargs):
-        super(GaussianRandomWalk, self).__init__(*args, **kwargs)
+        super(GaussianRandomWalk, self).__init__(ndim=1, size=(), *args, **kwargs)
         self.tau = tau
         self.sd = sd
         self.mu = mu
@@ -100,7 +100,7 @@ class GARCH11(Continuous):
 
     def __init__(self, omega=None, alpha_1=None, beta_1=None,
                  initial_vol=None, *args, **kwargs):
-        super(GARCH11, self).__init__(*args, **kwargs)
+        super(GARCH11, self).__init__(ndim=1, size=(), *args, **kwargs)
 
         self.omega = omega
         self.alpha_1 = alpha_1
diff --git a/pymc3/model.py b/pymc3/model.py
index d28f2340e0..fa02986b1c 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -500,11 +500,11 @@ def __init__(self, type=None, owner=None, index=None, name=None,
         super(FreeRV, self).__init__(type, owner, index, name)
 
         if distribution is not None:
-            self.dshape = tuple(distribution.shape)
-            self.dsize = int(np.prod(distribution.shape))
+            self.dshape = distribution.shape
+            self.dsize = tt.prod(distribution.shape)
             self.distribution = distribution
-            self.tag.test_value = np.ones(
-                distribution.shape, distribution.dtype) * distribution.default()
+            dist_test_val = distribution.default()
+            self.tag.test_value = np.ones(dist_test_val.shape, distribution.dtype) * dist_test_val
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
 
@@ -579,7 +579,6 @@ def __init__(self, type=None, owner=None, index=None, name=None, data=None,
         super(TensorVariable, self).__init__(type, None, None, name)
 
         if distribution is not None:
-
             data = as_tensor(data, name, model, distribution)
             self.missing_values = data.missing_values
 

From 02696f72604ff2ce5e7eed19817ef2fa91446f0f Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Sat, 21 May 2016 19:23:00 -0500
Subject: [PATCH 03/17] initial distribution shape refactoring

---
 pymc3/distributions/continuous.py   | 271 ++++++++++++++++------------
 pymc3/distributions/discrete.py     | 136 ++++++++------
 pymc3/distributions/distribution.py | 160 ++++++++++++----
 pymc3/distributions/multivariate.py | 201 +++++++++++++--------
 pymc3/model.py                      |   9 +-
 5 files changed, 484 insertions(+), 293 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 88c37d1f0b..db6975e8ea 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -14,7 +14,7 @@
 
 from . import transforms
 from .dist_math import bound, logpow, gammaln, betaln, std_cdf, i0, i1
-from .distribution import Continuous, draw_values, generate_samples
+from .distribution import UnivariateContinuous, draw_values, generate_samples
 
 __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
            'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull',
@@ -23,20 +23,20 @@
            'VonMises', 'SkewNormal']
 
 
-class PositiveContinuous(Continuous):
-    """Base class for positive continuous distributions"""
+class PositiveUnivariateContinuous(UnivariateContinuous):
+    """Base class for positive univariate continuous distributions"""
 
-    def __init__(self, transform=transforms.log, *args, **kwargs):
-        super(PositiveContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+    def __init__(self, *args, **kwargs):
+        transform = kwargs.get('transform', transforms.log)
+        super(PositiveUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
 
 
-class UnitContinuous(Continuous):
-    """Base class for continuous distributions on [0,1]"""
+class UnitUnivariateContinuous(UnivariateContinuous):
+    """Base class for univariate continuous distributions in [0,1]"""
 
-    def __init__(self, transform=transforms.logodds, *args, **kwargs):
-        super(UnitContinuous, self).__init__(
-            transform=transform, *args, **kwargs)
+    def __init__(self, *args, **kwargs):
+        transform = kwargs.get('transform', transforms.logodds)
+        super(UnitUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
 
 def assert_negative_support(var, label, distname, value=-1e-6):
     # Checks for evidence of positive support for a variable
@@ -98,10 +98,10 @@ def get_tau_sd(tau=None, sd=None):
     tau = 1. * tau
     sd = 1. * sd
 
-    return (tau, sd)
+    return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd))
 
 
-class Uniform(Continuous):
+class Uniform(UnivariateContinuous):
     R"""
     Continuous uniform log-likelihood.
 
@@ -123,15 +123,18 @@ class Uniform(Continuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, transform='interval',
-                 *args, **kwargs):
-        super(Uniform, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, lower=0, upper=1, transform='interval', size=None, ndim=None, dtype=None, *args, **kwargs):
 
+        lower = tt.as_tensor_variable(lower)
+        upper = tt.as_tensor_variable(upper)
+        self.dist_params = (lower, upper)
         self.lower = lower
         self.upper = upper
         self.mean = (upper + lower) / 2.
         self.median = self.mean
 
+        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
         if transform == 'interval':
             self.transform = transforms.interval(lower, upper)
 
@@ -150,15 +153,18 @@ def logp(self, value):
                               value >= lower, value <= upper)
 
 
-class Flat(Continuous):
+class Flat(UnivariateContinuous):
     """
     Uninformative log-likelihood that returns 0 regardless of
     the passed value.
     """
 
-    def __init__(self, *args, **kwargs):
-        super(Flat, self).__init__(ndim=1, *args, **kwargs)
-        self.median = 0
+    def __init__(self, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+        self.median = tt.as_tensor_variable(0.)
+        self.dist_params = (self.median,)
+
+        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         raise ValueError('Cannot sample from Flat distribution')
@@ -167,7 +173,7 @@ def logp(self, value):
         return tt.zeros_like(value)
 
 
-class Normal(Continuous):
+class Normal(UnivariateContinuous):
     R"""
     Univariate normal log-likelihood.
 
@@ -200,12 +206,7 @@ class Normal(Continuous):
     tau : float
         Precision (tau > 0).
     """
-<<<<<<< 5696ed49a2fd50b7662a0089e6c2573a2600a8ac
-
     def __init__(self, *args, **kwargs):
-
-        super(Normal, self).__init__(ndim=1, *args, **kwargs)
-
         # FIXME In order to catch the case where Normal('x', 0, .1) is
         # called to display a warning we have to fetch the args and
         # kwargs manually.  After a certain period we should revert
@@ -234,7 +235,9 @@ def __init__(self, *args, **kwargs):
         assert_negative_support(sd, 'sd', 'Normal')
         assert_negative_support(tau, 'tau', 'Normal')
 
-        super(Normal, self).__init__(**kwargs)
+        self.dist_params = (self.mu, self.tau)
+
+        super(Normal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, tau, sd = draw_values([self.mu, self.tau, self.sd],
@@ -251,7 +254,7 @@ def logp(self, value):
                      sd > 0)
 
 
-class HalfNormal(PositiveContinuous):
+class HalfNormal(PositiveUnivariateContinuous):
     R"""
     Half-normal log-likelihood.
 
@@ -274,8 +277,7 @@ class HalfNormal(PositiveContinuous):
     tau : float
         Precision (tau > 0).
     """
-    def __init__(self, sd=None, tau=None, *args, **kwargs):
-        super(HalfNormal, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
         self.variance = (1. - 2 / np.pi) / self.tau
@@ -283,6 +285,10 @@ def __init__(self, sd=None, tau=None, *args, **kwargs):
         assert_negative_support(tau, 'tau', 'HalfNormal')
         assert_negative_support(sd, 'sd', 'HalfNormal')
 
+        self.dist_params = (self.tau,)
+
+        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         sd = draw_values([self.sd], point=point)
         return generate_samples(stats.halfnorm.rvs, loc=0., scale=sd,
@@ -297,7 +303,7 @@ def logp(self, value):
                      tau > 0, sd > 0)
 
 
-class Wald(PositiveContinuous):
+class Wald(PositiveUnivariateContinuous):
     R"""
     Wald log-likelihood.
 
@@ -353,8 +359,8 @@ class Wald(PositiveContinuous):
         The American Statistician, Vol. 30, No. 2, pp. 88-90
     """
 
-    def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
-        super(Wald, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, size=None, dtype=None, *args, **kwargs):
+
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
         self.mean = self.mu + alpha
@@ -366,22 +372,29 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
         assert_negative_support(mu, 'mu', 'Wald')
         assert_negative_support(lam, 'lam', 'Wald')
 
+        self.dist_params = (self.mu, self.lam, self.alpha)
+
+        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def get_mu_lam_phi(self, mu, lam, phi):
+        res = None
         if mu is None:
             if lam is not None and phi is not None:
-                return lam / phi, lam, phi
+                res = (lam / phi, lam, phi)
         else:
             if lam is None:
                 if phi is None:
-                    return mu, 1., 1. / mu
+                    res = (mu, 1., 1. / mu)
                 else:
-                    return mu, mu * phi, phi
+                    res = (mu, mu * phi, phi)
             else:
                 if phi is None:
-                    return mu, lam, lam / mu
+                    res = (mu, lam, lam / mu)
 
-        raise ValueError('Wald distribution must specify either mu only, '
-                         'mu and lam, mu and phi, or lam and phi.')
+        if res is None:
+            raise ValueError('Wald distribution must specify either mu only, '
+                             'mu and lam, mu and phi, or lam and phi.')
+        return map(tt.as_tensor_variable, res)
 
     def _random(self, mu, lam, alpha, size=None):
         v = np.random.normal(size=size)**2
@@ -414,7 +427,7 @@ def logp(self, value):
                      mu > 0, lam > 0, alpha >= 0)
 
 
-class Beta(UnitContinuous):
+class Beta(UnitUnivariateContinuous):
     R"""
     Beta log-likelihood.
 
@@ -457,10 +470,7 @@ class Beta(UnitContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
-        super(Beta, self).__init__(ndim=1, *args, **kwargs)
-
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -471,6 +481,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None,
         assert_negative_support(alpha, 'alpha', 'Beta')
         assert_negative_support(beta, 'beta', 'Beta')
 
+        self.dist_params = (self.alpha, self.beta)
+
+        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
@@ -482,7 +496,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
             raise ValueError('Incompatible parameterization. Either use alpha '
                              'and beta, or mu and sd to specify distribution.')
 
-        return alpha, beta
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -501,7 +515,7 @@ def logp(self, value):
                      alpha > 0, beta > 0)
 
 
-class Exponential(PositiveContinuous):
+class Exponential(PositiveUnivariateContinuous):
     R"""
     Exponential log-likelihood.
 
@@ -521,17 +535,20 @@ class Exponential(PositiveContinuous):
         Rate or inverse scale (lam > 0)
     """
 
-    def __init__(self, lam, *args, **kwargs):
-        super(Exponential, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, lam, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.lam = lam
         self.mean = 1. / lam
         self.median = self.mean * tt.log(2)
         self.mode = 0
 
+        self.dist_params = (self.lam,)
+
         self.variance = lam**-2
 
         assert_negative_support(lam, 'lam', 'Exponential')
 
+        super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         lam = draw_values([self.lam], point=point)
         return generate_samples(np.random.exponential, scale=1. / lam,
@@ -543,7 +560,7 @@ def logp(self, value):
         return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
 
 
-class Laplace(Continuous):
+class Laplace(UnivariateContinuous):
     R"""
     Laplace log-likelihood.
 
@@ -566,15 +583,18 @@ class Laplace(Continuous):
         Scale parameter (b > 0).
     """
 
-    def __init__(self, mu, b, *args, **kwargs):
-        super(Laplace, self).__init__(ndim=1, *args, **kwargs)
-        self.b = b
-        self.mean = self.median = self.mode = self.mu = mu
-
+    def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.b = tt.as_tensor_variable(b)
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
+            mu)
         self.variance = 2 * b**2
 
         assert_negative_support(b, 'b', 'Laplace')
 
+        self.dist_params = (self.b, self.mu)
+
+        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, b = draw_values([self.mu, self.b], point=point)
         return generate_samples(np.random.laplace, mu, b,
@@ -588,7 +608,7 @@ def logp(self, value):
         return -tt.log(2 * b) - abs(value - mu) / b
 
 
-class Lognormal(PositiveContinuous):
+class Lognormal(PositiveUnivariateContinuous):
     R"""
     Log-normal log-likelihood.
 
@@ -616,11 +636,7 @@ class Lognormal(PositiveContinuous):
     tau : float
         Scale parameter (tau > 0).
     """
-<<<<<<< 5696ed49a2fd50b7662a0089e6c2573a2600a8ac
-
-    def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs):
-        super(Lognormal, self).__init__(ndim=1, *args, **kwargs)
-
+    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
 
@@ -632,6 +648,10 @@ def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs):
         assert_negative_support(tau, 'tau', 'Lognormal')
         assert_negative_support(sd, 'sd', 'Lognormal')
 
+        self.dist_params = (self.mu, self.tau)
+
+        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
         return np.exp(mu + (tau**-0.5) * samples)
@@ -651,13 +671,13 @@ def logp(self, value):
                      tau > 0)
 
 
-class StudentT(Continuous):
-    R"""
+class StudentT(UnivariateContinuous):
+    r"""
     Non-central Student's T log-likelihood.
 
     Describes a normal variable whose precision is gamma distributed.
     If only nu parameter is passed, this specifies a standard (central)
-    Student's T.
+    Student's tt.
 
     .. math::
 
@@ -680,8 +700,8 @@ class StudentT(Continuous):
         Scale parameter (lam > 0).
     """
 
-    def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
-        super(StudentT, self).__init__(ndim=1, **args, **kwargs)
+    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
         self.nu = nu = tt.as_tensor_variable(nu)
         self.lam, self.sd = get_tau_sd(tau=lam, sd=sd)
         self.mean = self.median = self.mode = self.mu = mu
@@ -693,6 +713,9 @@ def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs):
         assert_negative_support(lam, 'lam (sd)', 'StudentT')
         assert_negative_support(nu, 'nu', 'StudentT')
 
+        self.dist_params = (self.nu, self.mu, self.lam)
+        super(StudentT, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         nu, mu, lam = draw_values([self.nu, self.mu, self.lam],
                                   point=point)
@@ -713,7 +736,7 @@ def logp(self, value):
                      lam > 0, nu > 0, sd > 0)
 
 
-class Pareto(PositiveContinuous):
+class Pareto(PositiveUnivariateContinuous):
     R"""
     Pareto log-likelihood.
 
@@ -738,13 +761,10 @@ class Pareto(PositiveContinuous):
     m : float
         Scale parameter (m > 0).
     """
-
-    def __init__(self, alpha, m, *args, **kwargs):
-        super(Pareto, self).__init__(ndim=1, *args, **kwargs)
-        self.alpha = alpha
-        self.m = m
-        self.mean = tt.switch(tt.gt(alpha, 1), alpha *
-                              m / (alpha - 1.), np.inf)
+    def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.m = tt.as_tensor_variable(m)
+        self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.), np.inf)
         self.median = m * 2.**(1. / alpha)
         self.variance = tt.switch(
             tt.gt(alpha, 2),
@@ -754,6 +774,9 @@ def __init__(self, alpha, m, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'Pareto')
         assert_negative_support(m, 'm', 'Pareto')
 
+        self.dist_params = (self.alpha, self.m)
+
+        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
@@ -774,7 +797,7 @@ def logp(self, value):
                      value >= m, alpha > 0, m > 0)
 
 
-class Cauchy(Continuous):
+class Cauchy(UnivariateContinuous):
     R"""
     Cauchy log-likelihood.
 
@@ -800,10 +823,13 @@ class Cauchy(Continuous):
         Scale parameter > 0
     """
 
-    def __init__(self, alpha, beta, *args, **kwargs):
-        super(Cauchy, self).__init__(ndim=1, *args, **kwargs)
-        self.median = self.mode = self.alpha = alpha
-        self.beta = beta
+    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+
+        self.dist_params = (self.alpha, self.beta)
+
+        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'Cauchy')
 
@@ -826,7 +852,7 @@ def logp(self, value):
                      beta > 0)
 
 
-class HalfCauchy(PositiveContinuous):
+class HalfCauchy(PositiveUnivariateContinuous):
     R"""
     Half-Cauchy log-likelihood.
 
@@ -847,11 +873,14 @@ class HalfCauchy(PositiveContinuous):
         Scale parameter (beta > 0).
     """
 
-    def __init__(self, beta, *args, **kwargs):
-        super(HalfCauchy, self).__init__(ndim=1, *args, **kwargs)
-        self.mode = 0
+    def __init__(self, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mode = tt.as_tensor_variable(0.)
+        self.beta = tt.as_tensor_variable(beta)
         self.median = beta
-        self.beta = beta
+
+        self.dist_params = (self.beta,)
+
+        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'HalfCauchy')
 
@@ -872,7 +901,7 @@ def logp(self, value):
                      value >= 0, beta > 0)
 
 
-class Gamma(PositiveContinuous):
+class Gamma(PositiveUnivariateContinuous):
     R"""
     Gamma log-likelihood.
 
@@ -911,9 +940,7 @@ class Gamma(PositiveContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None,
-                 *args, **kwargs):
-        super(Gamma, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -924,6 +951,10 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None,
         assert_negative_support(alpha, 'alpha', 'Gamma')
         assert_negative_support(beta, 'beta', 'Gamma')
 
+        self.dist_params = (self.alpha, self.beta)
+
+        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
             pass
@@ -935,7 +966,7 @@ def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
                              'alpha and beta, or mu and sd to specify '
                              'distribution.')
 
-        return alpha, beta
+        return tt.as_tensor_variable(alpha), tt.as_tensor_variable(beta)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -956,7 +987,7 @@ def logp(self, value):
             beta > 0)
 
 
-class InverseGamma(PositiveContinuous):
+class InverseGamma(PositiveUnivariateContinuous):
     R"""
     Inverse gamma log-likelihood, the reciprocal of the gamma distribution.
 
@@ -980,9 +1011,7 @@ class InverseGamma(PositiveContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-
-    def __init__(self, alpha, beta=1, *args, **kwargs):
-        super(InverseGamma, self).__init__(ndim=1, *args, **kwargs)
+    def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kwargs):
         self.alpha = alpha
         self.beta = beta
         self.mean = self._calculate_mean()
@@ -993,6 +1022,8 @@ def __init__(self, alpha, beta=1, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'InverseGamma')
         assert_negative_support(beta, 'beta', 'InverseGamma')
 
+        super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def _calculate_mean(self):
         m = self.beta / (self.alpha - 1.)
         try:
@@ -1001,6 +1032,9 @@ def _calculate_mean(self):
             m[self.alpha <= 1] = np.inf
             return m
 
+        self.dist_params = (self.alpha, self.beta)
+
+
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point)
@@ -1037,12 +1071,11 @@ class ChiSquared(Gamma):
     """
 
     def __init__(self, nu, *args, **kwargs):
-        self.nu = nu
-        super(ChiSquared, self).__init__(ndim=1, alpha=nu / 2., beta=0.5,
-                                         *args, **kwargs)
+        self.nu = tt.as_tensor_variable(nu)
+        super(ChiSquared, self).__init__(alpha=self.nu / 2., beta=tt.as_tensor_variable(0.5), *args, **kwargs)
 
 
-class Weibull(PositiveContinuous):
+class Weibull(PositiveUnivariateContinuous):
     R"""
     Weibull log-likelihood.
 
@@ -1065,11 +1098,9 @@ class Weibull(PositiveContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-
-    def __init__(self, alpha, beta, *args, **kwargs):
-        super(Weibull, self).__init__(ndim=1, *args, **kwargs)
-        self.alpha = alpha
-        self.beta = beta
+    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
         self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
         self.median = beta * tt.exp(gammaln(tt.log(2)))**(1. / alpha)
         self.variance = (beta**2) * \
@@ -1078,6 +1109,10 @@ def __init__(self, alpha, beta, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'Weibull')
         assert_negative_support(beta, 'beta', 'Weibull')
 
+        self.dist_params = (self.alpha, self.beta)
+
+        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point)
@@ -1098,7 +1133,7 @@ def logp(self, value):
                      value >= 0, alpha > 0, beta > 0)
 
 
-class Bounded(Continuous):
+class Bounded(UnivariateContinuous):
     R"""
     An upper, lower or upper+lower bounded distribution
 
@@ -1214,7 +1249,7 @@ def StudentTpos(*args, **kwargs):
 HalfStudentT = Bound(StudentT, lower=0)
 
 
-class ExGaussian(Continuous):
+class ExGaussian(UnivariateContinuous):
     R"""
     Exponentially modified Gaussian log-likelihood.
 
@@ -1259,17 +1294,20 @@ class ExGaussian(Continuous):
         Vol. 4, No. 1, pp 35-45.
     """
 
-    def __init__(self, mu, sigma, nu, *args, **kwargs):
-        super(ExGaussian, self).__init__(ndim=1, *args, **kwargs)
-        self.mu = mu
-        self.sigma = sigma
-        self.nu = nu
+    def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.sigma = tt.as_tensor_variable(sigma)
+        self.nu = tt.as_tensor_variable(nu)
         self.mean = mu + nu
         self.variance = (sigma**2) + (nu**2)
 
         assert_negative_support(sigma, 'sigma', 'ExGaussian')
         assert_negative_support(nu, 'nu', 'ExGaussian')
 
+        self.dist_params = (self.mu, self.sigma, self.nu)
+
+        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
                                     point=point)
@@ -1296,7 +1334,7 @@ def logp(self, value):
         return bound(lp, sigma > 0., nu > 0.)
 
 
-class VonMises(Continuous):
+class VonMises(UnivariateContinuous):
     R"""
     Univariate VonMises log-likelihood.
 
@@ -1320,12 +1358,15 @@ class VonMises(Continuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular',
-                 *args, **kwargs):
-        super(VonMises, self).__init__(ndim=1, *args, **kwargs)
-        self.mean = self.median = self.mode = self.mu = mu
-        self.kappa = kappa
-        self.variance = 1 - i1(kappa) / i0(kappa)
+    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
+            mu)
+        self.kappa = tt.as_tensor_variable(kappa)
+        self.variance = 1. - i1(kappa) / i0(kappa)
+
+        self.dist_params = (self.mu, self.kappa)
+
+        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
         if transform == 'circular':
             self.transform = transforms.Circular()
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index edfdfaecb9..cf0a56f597 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -6,7 +6,7 @@
 from scipy import stats
 
 from .dist_math import bound, factln, binomln, betaln, logpow
-from .distribution import Discrete, draw_values, generate_samples
+from .distribution import UnivariateDiscrete, draw_values, generate_samples
 
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'Poisson',
            'NegativeBinomial', 'ConstantDist', 'Constant', 'ZeroInflatedPoisson',
@@ -14,7 +14,7 @@
            'Categorical']
 
 
-class Binomial(Discrete):
+class Binomial(UnivariateDiscrete):
     R"""
     Binomial log-likelihood.
 
@@ -37,13 +37,15 @@ class Binomial(Discrete):
     p : float
         Probability of success in each trial (0 < p < 1).
     """
-
-    def __init__(self, n, p, *args, **kwargs):
-        super(Binomial, self).__init__(*args, **kwargs)
-        self.n = n
-        self.p = p
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.n = tt.as_tensor_variable(n)
+        self.p = tt.as_tensor_variable(p)
         self.mode = tt.cast(tt.round(n * p), self.dtype)
 
+        self.dist_params = (self.n, self.p)
+
+        super(Binomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         n, p = draw_values([self.n, self.p], point=point)
         return generate_samples(stats.binom.rvs, n=n, p=p,
@@ -60,7 +62,7 @@ def logp(self, value):
             0 <= p, p <= 1)
 
 
-class BetaBinomial(Discrete):
+class BetaBinomial(UnivariateDiscrete):
     R"""
     Beta-binomial log-likelihood.
 
@@ -88,14 +90,16 @@ class BetaBinomial(Discrete):
     beta : float
         beta > 0.
     """
-
-    def __init__(self, alpha, beta, n, *args, **kwargs):
-        super(BetaBinomial, self).__init__(*args, **kwargs)
-        self.alpha = alpha
-        self.beta = beta
-        self.n = n
+    def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.n = tt.as_tensor_variable(n)
         self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8')
 
+        self.dist_params = (self.alpha, self.beta, self.n)
+
+        super(BetaBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
         p = np.atleast_1d(stats.beta.rvs(a=alpha, b=beta, size=np.prod(size)))
@@ -125,7 +129,7 @@ def logp(self, value):
                      alpha > 0, beta > 0)
 
 
-class Bernoulli(Discrete):
+class Bernoulli(UnivariateDiscrete):
     R"""Bernoulli log-likelihood
 
     The Bernoulli distribution describes the probability of successes
@@ -144,12 +148,14 @@ class Bernoulli(Discrete):
     p : float
         Probability of success (0 < p < 1).
     """
-
-    def __init__(self, p, *args, **kwargs):
-        super(Bernoulli, self).__init__(*args, **kwargs)
-        self.p = p
+    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.p = tt.as_tensor_variable(p)
         self.mode = tt.cast(tt.round(p), 'int8')
 
+        self.dist_params = (self.p,)
+        # FIXME: bcast, etc.
+        super(Bernoulli, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
         return generate_samples(stats.bernoulli.rvs, p,
@@ -164,7 +170,7 @@ def logp(self, value):
             p >= 0, p <= 1)
 
 
-class Poisson(Discrete):
+class Poisson(UnivariateDiscrete):
     R"""
     Poisson log-likelihood.
 
@@ -190,12 +196,14 @@ class Poisson(Discrete):
     The Poisson distribution can be derived as a limiting case of the
     binomial distribution.
     """
-
-    def __init__(self, mu, *args, **kwargs):
-        super(Poisson, self).__init__(*args, **kwargs)
-        self.mu = mu
+    def __init__(self, mu, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
         self.mode = tt.floor(mu).astype('int32')
 
+        self.dist_params = (self.mu,)
+
+        super(Poisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu = draw_values([self.mu], point=point)
         return generate_samples(stats.poisson.rvs, mu,
@@ -212,7 +220,7 @@ def logp(self, value):
                          0, log_prob)
 
 
-class NegativeBinomial(Discrete):
+class NegativeBinomial(UnivariateDiscrete):
     R"""
     Negative binomial log-likelihood.
 
@@ -237,13 +245,15 @@ class NegativeBinomial(Discrete):
     alpha : float
         Gamma distribution parameter (alpha > 0).
     """
-
-    def __init__(self, mu, alpha, *args, **kwargs):
-        super(NegativeBinomial, self).__init__(*args, **kwargs)
-        self.mu = mu
-        self.alpha = alpha
+    def __init__(self, mu, alpha, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.alpha = tt.as_tensor_variable(alpha)
         self.mode = tt.floor(mu).astype('int32')
 
+        self.dist_params = (self.mu, self.alpha)
+
+        super(NegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point)
         g = generate_samples(stats.gamma.rvs, alpha, scale=mu / alpha,
@@ -266,7 +276,7 @@ def logp(self, value):
                          negbinom)
 
 
-class Geometric(Discrete):
+class Geometric(UnivariateDiscrete):
     R"""
     Geometric log-likelihood.
 
@@ -286,11 +296,12 @@ class Geometric(Discrete):
     p : float
         Probability of success on an individual trial (0 < p <= 1).
     """
+    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.p = tt.as_tensor_variable(p)
+        self.mode = tt.as_tensor_variable(1)
+        self.dist_params = (self.p,)
 
-    def __init__(self, p, *args, **kwargs):
-        super(Geometric, self).__init__(*args, **kwargs)
-        self.p = p
-        self.mode = 1
+        super(Geometric, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -304,7 +315,7 @@ def logp(self, value):
                      0 <= p, p <= 1, value >= 1)
 
 
-class DiscreteUniform(Discrete):
+class DiscreteUniform(UnivariateDiscrete):
     R"""
     Discrete uniform distribution.
 
@@ -323,13 +334,14 @@ class DiscreteUniform(Discrete):
     upper : int
         Upper limit (upper > lower).
     """
-
-    def __init__(self, lower, upper, *args, **kwargs):
-        super(DiscreteUniform, self).__init__(*args, **kwargs)
+    def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.lower = tt.floor(lower).astype('int32')
         self.upper = tt.floor(upper).astype('int32')
         self.mode = tt.maximum(
             tt.floor((upper - lower) / 2.).astype('int32'), self.lower)
+        self.dist_params = (self.lower, self.upper)
+
+        super(DiscreteUniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -352,7 +364,7 @@ def logp(self, value):
                      lower <= value, value <= upper)
 
 
-class Categorical(Discrete):
+class Categorical(UnivariateDiscrete):
     R"""
     Categorical log-likelihood.
 
@@ -370,9 +382,8 @@ class Categorical(Discrete):
         p > 0 and the elements of p must sum to 1. They will be automatically
         rescaled otherwise.
     """
+    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
-    def __init__(self, p, *args, **kwargs):
-        super(Categorical, self).__init__(*args, **kwargs)
         try:
             self.k = tt.shape(p)[-1].tag.test_value
         except AttributeError:
@@ -381,6 +392,14 @@ def __init__(self, p, *args, **kwargs):
         self.p = (p.T / tt.sum(p, -1)).T
         self.mode = tt.argmax(p)
 
+        self.shape_support = tt.shape(self.p)[-1]
+
+        self.dist_params = (self.p,)
+        # FIXME: The stated support is univariate?
+
+        super(Categorical, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
+
     def random(self, point=None, size=None, repeat=None):
         def random_choice(k, *args, **kwargs):
             if len(kwargs['p'].shape) > 1:
@@ -393,10 +412,6 @@ def random_choice(k, *args, **kwargs):
 
         p, k = draw_values([self.p, self.k], point=point)
         return generate_samples(partial(random_choice, np.arange(k)),
-                                p=p,
-                                broadcast_shape=p.shape[:-1] or (1,),
-                                dist_shape=self.shape,
-                                size=size)
 
     def logp(self, value):
         p = self.p
@@ -413,7 +428,7 @@ def logp(self, value):
                      sumto1)
 
 
-class Constant(Discrete):
+class ConstantDist(UnivariateDiscrete):
     """
     Constant log-likelihood.
 
@@ -422,10 +437,10 @@ class Constant(Discrete):
     value : float or int
         Constant parameter.
     """
-
-    def __init__(self, c, *args, **kwargs):
-        super(Constant, self).__init__(*args, **kwargs)
-        self.mean = self.median = self.mode = self.c = c
+    def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
+        self.dist_params = (self.c,)
+        super(ConstantDist, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         c = draw_values([self.c], point=point)
@@ -476,14 +491,16 @@ class ZeroInflatedPoisson(Discrete):
         Expected proportion of Poisson variates (0 < psi < 1)
 
     """
-
-    def __init__(self, theta, psi, *args, **kwargs):
-        super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
-        self.theta = theta
-        self.psi = psi
+    def __init__(self, theta, psi, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.theta = tt.as_tensor_variable(theta)
+        self.psi = tt.as_tensor_variable(psi)
         self.pois = Poisson.dist(theta)
         self.mode = self.pois.mode
 
+        self.dist_params = (self.theta, self.z)
+
+        super(ZeroInflatedPoisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None, repeat=None):
         theta, psi = draw_values([self.theta, self.psi], point=point)
         g = generate_samples(stats.poisson.rvs, theta,
@@ -497,7 +514,7 @@ def logp(self, value):
                          tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
 
 
-class ZeroInflatedNegativeBinomial(Discrete):
+class ZeroInflatedNegativeBinomial(UnivariateDiscrete):
     R"""
     Zero-Inflated Negative binomial log-likelihood.
 
@@ -528,13 +545,14 @@ class ZeroInflatedNegativeBinomial(Discrete):
         Expected proportion of NegativeBinomial variates (0 < psi < 1)
     """
 
-    def __init__(self, mu, alpha, psi, *args, **kwargs):
-        super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
+    def __init__(self, mu, alpha, psi, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.mu = mu
         self.alpha = alpha
         self.psi = psi
         self.nb = NegativeBinomial.dist(mu, alpha)
         self.mode = self.nb.mode
+        self.dist_params = (mu, alpha, psi)
+        super(ZeroInflatedNegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, alpha, psi = draw_values(
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index be89370c75..7393255588 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,14 +1,17 @@
 import numpy as np
 import theano.tensor as tt
 from theano import function
+from theano.tensor.raw_random import _infer_ndim_bcast
 
 from ..memoize import memoize
 from ..model import Model, get_named_nodes
 from ..vartypes import string_types
 
 
-__all__ = ['DensityDist', 'Distribution', 'Continuous',
-           'Discrete', 'NoDistribution', 'TensorType', 'draw_values']
+__all__ = ['DensityDist', 'Distribution',
+           'Continuous', 'Discrete', 'NoDistribution', 'TensorType',
+           'MultivariateContinuous', 'UnivariateContinuous',
+           'MultivariateDiscrete', 'UnivariateDiscrete']
 
 class _Unpickling(object):
     pass
@@ -41,52 +44,101 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, ndim, size, dtype, testval=None, defaults=[], transform=None):
-        """
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None, defaults=[], transform=None):
+        r"""
+        Distributions are specified in terms of the shape of their support, the shape
+        of the space of independent instances and the shape of the space of replications.
+        The "total" shape of the distribution is the concatenation of each of these
+        spaces, i.e. `dist.shape = shape_reps + shape_ind + shape_supp`.
+
+        We're able to specify the number of dimensions for the
+        support of a concrete distribution type (e.g. scalar distributions have
+        `ndim_supp=0` and a multivariate normal has vector support,
+        so `ndim_supp=1`) and have the exact sizes of each dimension symbolic.
+        To actually instantiate a distribution,
+        we at least need a list/tuple/vector (`ndim_supp` in
+        length) containing symbolic scalars, `shape_supp`, representing the
+        exact size of each dimension.  In the case that `shape_supp` has an
+        unknown length at the graph building stage (e.g. it is a generic Theano
+        vector or tensor), we must provide `ndim_supp`.
+
+        The symbolic scalars `shape_supp` must either be required by a
+        distribution's constructor, or inferred through its required
+        parameters.  Since most distributions are either scalar, or
+        have parameters within the space of their support (e.g. the
+        multivariate normal's mean parameter) inference can be
+        straight-forward.  In the latter case, we refer to the parameters
+        as "informative".
+
+        We also attempt to handle the specification of a collections of independent--
+        but not identical instances of the base distribution (each with support as above).
+        These have a space with shape `shape_ind`.  Generally, `shape_ind` will be
+        implicitly given by the distribution's parameters.  For instance,
+        if a multivariate normal distribution is instantiated with a matrix mean
+        parameter `mu`, we can assume that each row specifies the mean for an
+        independent distribution.  In this case the covariance parameter would either
+        have to be an `ndim=3` tensor, for which the last two dimensions specify each
+        covariance matrix, or a single matrix that is to apply to each independent
+        variate implied by the matrix `mu`.
+
+        Here are a few ways of inferring shapes:
+            * When a distribution is scalar, then `shape_supp = ()`
+                * and has an informative parameter, e.g. `mu`, then `shape_ind = T.shape(mu)`.
+            * When a distribution is multivariate
+                * and has an informative parameter, e.g. `mu`, then
+                `shape_supp = T.shape(mu)[-ndim_supp:]` and `shape_ind = T.shape(mu)[-ndim_supp:]`.
+        In all remaining cases the shapes must be provided by the caller.
+        `shape_reps` is always provided by the caller.
+
+
         Parameters
         ----------
-        ndim
-            Dimensions of the support for this distribution type.
-        size
-            Shape/dimensions of the space that will contain
-            independent draws under this distribution.
+        shape_supp
+            tuple
+            Shape of the support for this distribution type.
+        shape_ind
+            tuple
+            Dimension of independent, but not necessarily identical, copies of
+            this distribution type.
+        shape_reps
+            tuple
+            Dimension of independent and identical copies of
+            this distribution type.
+        bcast
+            A tuple of boolean values.
+            Broadcast dimensions.
         dtype
-            The Theano data type.
+            The data type.
         testval
             Test value to be added to the Theano variable's tag.test_value.
         defaults
             List of string names for attributes that can be used as this
             distribution's default value.
         transform
-            A random variable transform to be applied?
+            A transform function
         """
+
+        self.shape_supp = shape_supp
+        self.shape_ind = shape_ind
+        self.shape_reps = shape_reps
+        self.shape = shape_supp + shape_ind + shape_reps
+
         #
         # Following Theano Op's handling of shape parameters (well, at least
-        # theano.tensor.raw_random.RandomFunction).
+        # theano.tensor.raw_random.RandomFunction.make_node).
         #
-        #support_shape_ = T.as_tensor_variable(, ndim=1)
-        #if support_shape == ():
-        #    self.support_shape = support_shape_.astype('int64')
-        #else:
-        #    self.support_shape = support_shape_
-
-        #if self.support_shape.type.ndim != 1 or \
-        #    not (self.support_shape.type.dtype == 'int64') and \
-        #    not (self.support_shape.type.dtype == 'int32'):
+        #if self.shape.type.ndim != 1 or \
+        #        not (self.shape.type.dtype == 'int64') and \
+        #        not (self.shape.type.dtype == 'int32'):
         #    raise TypeError("Expected int elements in shape")
 
-        from theano.tensor.raw_random import _infer_ndim_bcast
-        import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
-
-        ndim, size, bcast = _infer_ndim_bcast(ndim, size, testval)
-
-        self.ndim = ndim
-        self.size = size
-        self.dtype = dtype
+        #self.ndim = self.shape.ndim
+        #self.dtype = dtype
+        #self.bcast = bcast
         self.testval = testval
         self.defaults = defaults
         self.transform = transform
-        self.type = T.TensorType(str(self.dtype), bcast)
+        self.type = T.TensorType(str(dtype), bcast)
 
     def default(self):
         return self.get_test_val(self.testval, self.defaults)
@@ -151,25 +203,48 @@ def __init__(self, ndim, size=(), dtype='int64', defaults=['mode'], *args, **kwa
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, ndim, size=(), dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
-        super(Continuous, self).__init__(
-            ndim, size, dtype, defaults=defaults, *args, **kwargs)
+    def __init__(self, ndim_support, ndim, size, bcast, dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
+        super(Continuous, self).__init__(ndim_support, ndim, size,
+                                         bcast, dtype, defaults=defaults, *args, **kwargs)
 
 
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, ndim, size=(), dtype='float64', testval=0, *args, **kwargs):
+    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64', *args, **kwargs):
         super(DensityDist, self).__init__(
-            ndim, size, dtype, testval, *args, **kwargs)
+            ndim, size, bcast, dtype, *args, **kwargs)
         self.logp = logp
 
 
+class UnivariateContinuous(Continuous):
+
+    def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *args, **kwargs):
+        """
+
+        Parameters
+        ==========
+
+        dist_params
+            list or tuple of parameters to use for shape inference
+
+        """
+
+        self.shape_supp = ()
+        dist_params = tuple(T.as_tensor_variable(x) for x in dist_params)
+
+        ndim, size, bcast = _infer_ndim_bcast(*(ndim, size) + dist_params)
+        if dtype is None:
+            dtype = T.scal.upcast(*(T.config.floatX,) + tuple(x.dtype for x in dist_params))
+
+        # We just assume
+        super(UnivariateContinuous, self).__init__(
+            tuple(), tuple(size), size, bcast, *args, **kwargs)
+
+
 class MultivariateContinuous(Continuous):
 
-    def __init__(self, ndim, size=(), *args, **kwargs):
-        super(MultivariateContinuous, self).__init__(
-            ndim, size, *args, **kwargs)
+    pass
 
 
 
@@ -178,6 +253,15 @@ class MultivariateDiscrete(Discrete):
     pass
 
 
+class UnivariateDiscrete(Discrete):
+
+    def __init__(self, ndim, size, bcast, *args, **kwargs):
+        self.shape_supp = ()
+
+        super(UnivariateDiscrete, self).__init__(
+            0, ndim, size, bcast, *args, **kwargs)
+
+
 def draw_values(params, point=None):
     """
     Draw (fix) parameter values. Handles a number of cases:
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index f5bc060045..a45f6425a8 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -12,9 +12,9 @@
 
 import pymc3 as pm
 from . import transforms
-from .distribution import Continuous, Discrete, draw_values, generate_samples
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
+from .distribution import MultivariateContinuous, MultivariateDiscrete, draw_values, generate_samples
 from .special import gammaln, multigammaln
 from .dist_math import bound, logpow, factln
 
@@ -58,7 +58,7 @@ def get_tau_cov(mu, tau=None, cov=None):
 
     return (tau, cov)
 
-class MvNormal(Continuous):
+class MvNormal(MultivariateContinuous):
     r"""
     Multivariate normal log-likelihood.
 
@@ -82,17 +82,32 @@ class MvNormal(Continuous):
         Covariance matrix.
     tau : array, optional
         Precision matrix.
-    """
+    ndim : int
+        TODO
+    size : tuple of ints
+        TODO
 
-    def __init__(self, mu, cov=None, tau=None, *args, **kwargs):
-        super(MvNormal, self).__init__(*args, **kwargs)
+    """
+    def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs):
         warnings.warn(('The calling signature of MvNormal() has changed. The new signature is:\n'
                        'MvNormal(name, mu, cov) instead of MvNormal(name, mu, tau).'
                        'You do not need to explicitly invert the covariance matrix anymore.'),
                       DeprecationWarning)
-        self.mean = self.median = self.mode = self.mu = mu
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
         self.tau, self.cov = get_tau_cov(mu, tau=tau, cov=cov)
 
+        self.dist_params = (self.mu, self.tau)
+
+        # TODO: how do we want to use ndim and size?
+        shape_supp = self.mu[-1]
+        shape_ind = self.mu[:-1]
+
+        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
+        if dtype is None:
+            dtype = self.mu.dtype
+
+        super(MvNormal, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None):
         mu, cov = draw_values([self.mu, self.cov], point=point)
 
@@ -119,40 +134,8 @@ def logp(self, value):
         result += (delta.dot(tau) * delta).sum(axis=delta.ndim - 1)
         return -1 / 2. * result
 
-    #def logp(self, value):
-    #    mu = tt.as_tensor_variable(self.mu)
-    #    tau = tt.as_tensor_variable(self.tau)
-
-    #    reps_shape_T = tau.shape[:-2]
-    #    reps_shape_prod = tt.prod(reps_shape_T, keepdims=True)
-    #    dist_shape_T = mu.shape[-1:]
-
-    #    # collapse reps dimensions
-    #    flat_supp_shape = tt.concatenate([reps_shape_prod, dist_shape_T])
-    #    mus_collapsed = mu.reshape(flat_supp_shape, ndim=2)
-    #    taus_collapsed = tau.reshape(tt.concatenate([reps_shape_prod,
-    #        dist_shape_T, dist_shape_T]), ndim=3)
 
-    #    # force value to conform to reps_shape
-    #    value_reshape = tt.ones_like(mu) * value
-    #    values_collapsed = value_reshape.reshape(flat_supp_shape, ndim=2)
-
-    #    def single_logl(_mu, _tau, _value, k):
-    #        delta = _value - _mu
-    #        result = k * tt.log(2 * np.pi) + tt.log(det(_tau))
-    #        result += tt.square(delta.dot(_tau)).sum(axis=-1)
-    #        return -result/2
-
-    #    from theano import scan
-    #    res, _ = scan(fn=single_logl
-    #            , sequences=[mus_collapsed, taus_collapsed, values_collapsed]
-    #            , non_sequences=[dist_shape_T]
-    #            , strict=True
-    #            )
-    #    return res.sum()
-
-
-class MvStudentT(Continuous):
+class MvStudentT(MultivariateContinuous):
     R"""
     Multivariate Student-T log-likelihood.
 
@@ -178,15 +161,27 @@ class MvStudentT(Continuous):
     mu : array
         Vector of means.
     """
+    def __init__(self, nu, Sigma, mu=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.nu = tt.as_tensor_variable(nu)
+        self.Sigma = tt.as_tensor_variable(Sigma)
+
+        # TODO: do this correctly; what about replications?
+        shape_supp = self.Sigma.shape[0]
+        shape_ind = self.Sigma.shape[:-2]
 
-    def __init__(self, nu, Sigma, mu=None, *args, **kwargs):
-        super(MvStudentT, self).__init__(*args, **kwargs)
-        self.nu = nu
-        self.mu = np.zeros(Sigma.shape[0]) if mu is None else mu
-        self.Sigma = Sigma
+        self.mu = tt.as_tensor_variable(tt.zeros(shape_supp) if mu is None else mu)
 
+        self.dist_params = (self.nu, self.mu, self.Sigma)
         self.mean = self.median = self.mode = self.mu = mu
 
+        # FIXME: this isn't correct/ideal
+        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
+        if dtype is None:
+            dtype = self.mu.dtype
+
+        super(MvStudentT, self).__init__(shape_supp, shape_ind, (), ndim,
+                                         bcast, dtype, *args, **kwargs)
+
     def random(self, point=None, size=None):
         chi2 = np.random.chisquare
         mvn = np.random.multivariate_normal
@@ -216,7 +211,7 @@ def logp(self, value):
         return log_pdf
 
 
-class Dirichlet(Continuous):
+class Dirichlet(MultivariateContinuous):
     R"""
     Dirichlet log-likelihood.
 
@@ -245,20 +240,28 @@ class Dirichlet(Continuous):
     Only the first `k-1` elements of `x` are expected. Can be used
     as a parent of Multinomial and Categorical nevertheless.
     """
+    def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.a = tt.as_tensor_variable(a)
+        self.mean = self.a / tt.sum(self.a)
+        self.mode = tt.switch(tt.all(self.a > 1),
+                             (self.a - 1) / tt.sum(self.a - 1),
+                             np.nan)
 
-    def __init__(self, a, transform=transforms.stick_breaking,
-                 *args, **kwargs):
-        shape = a.shape[-1]
-        kwargs.setdefault("shape", shape)
-        super(Dirichlet, self).__init__(transform=transform, *args, **kwargs)
-
-        self.k = shape
-        self.a = a
-        self.mean = a / tt.sum(a)
+        self.dist_params = (self.a,)
 
         self.mode = tt.switch(tt.all(a > 1),
                               (a - 1) / tt.sum(a - 1),
                               np.nan)
+        # TODO: do this correctly; what about replications?
+        shape_supp = self.a.shape[-1]
+        shape_ind = self.a.shape[:-1]
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
+        if dtype is None:
+            dtype = self.mu.dtype
+
+        super(Dirichlet, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, transform=transform, *args, **kwargs)
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point)
@@ -267,12 +270,12 @@ def _random(a, size=None):
             return stats.dirichlet.rvs(a, None if size == a.shape else size)
 
         samples = generate_samples(_random, a,
-                                   dist_shape=self.shape,
+                                   dist_shape=self.shape_support,
                                    size=size)
         return samples
 
     def logp(self, value):
-        k = self.k
+        k = self.shape_supp
         a = self.a
 
         # only defined for sum(value) == 1
@@ -283,7 +286,7 @@ def logp(self, value):
                      broadcast_conditions=False)
 
 
-class Multinomial(Discrete):
+class Multinomial(MultivariateDiscrete):
     R"""
     Multinomial log-likelihood.
 
@@ -314,8 +317,7 @@ class Multinomial(Discrete):
         be non-negative and sum to 1 along the last axis. They will be automatically
         rescaled otherwise.
     """
-
-    def __init__(self, n, p, *args, **kwargs):
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         super(Multinomial, self).__init__(*args, **kwargs)
 
         p = p / tt.sum(p, axis=-1, keepdims=True)
@@ -336,6 +338,21 @@ def __init__(self, n, p, *args, **kwargs):
         self.mean = self.n * self.p
         self.mode = tt.cast(tt.round(self.mean), 'int32')
 
+        self.dist_params = (self.n, self.p)
+
+        # TODO: do this correctly; what about replications?
+        # check that n == len(p)?
+        shape_supp = tuple(self.n)
+        shape_ind = ()
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,)
+        if dtype is None:
+            dtype = self.p.dtype
+
+        # FIXME: n.shape == p.shape? bcast, etc.
+        super(Multinomial, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+
     def _random(self, n, p, size=None):
         if size == p.shape:
             size = None
@@ -413,7 +430,7 @@ def __str__(self):
 matrix_pos_def = PosDefMatrix()
 
 
-class Wishart(Continuous):
+class Wishart(MultivariateContinuous):
     R"""
     Wishart log-likelihood.
 
@@ -450,8 +467,7 @@ class Wishart(Continuous):
     use WishartBartlett or LKJCorr.
     """
 
-    def __init__(self, n, V, *args, **kwargs):
-        super(Wishart, self).__init__(*args, **kwargs)
+    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
         warnings.warn('The Wishart distribution can currently not be used '
                       'for MCMC sampling. The probability of sampling a '
                       'symmetric matrix is basically zero. Instead, please '
@@ -459,17 +475,30 @@ def __init__(self, n, V, *args, **kwargs):
                       'For more information on the issues surrounding the '
                       'Wishart see here: https://github.com/pymc-devs/pymc3/issues/538.',
                       UserWarning)
-        self.n = n
-        self.p = p = V.shape[0]
-        self.V = V
+        # TODO: allow tensor of independent n's.
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.V = tt.as_tensor_variable(V)
         self.mean = n * V
-        self.mode = tt.switch(1 * (n >= p + 1),
-                              (n - p - 1) * V,
-                              np.nan)
+        self.mode = tt.switch(1 * (n >= self.shape_support + 1),
+                             (n - self.shape_support - 1) * V,
+                             np.nan)
+
+        self.dist_params = (self.n, self.V)
+
+        # TODO: do this correctly; what about replications?
+        shape_supp = tuple(self.V.shape[-1])
+        shape_ind = ()
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,)
+        if dtype is None:
+            dtype = self.V.dtype
+
+        super(Wishart, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
 
     def logp(self, X):
         n = self.n
-        p = self.p
+        p = self.shape_support
         V = self.V
 
         IVI = det(V)
@@ -564,7 +593,7 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
 
 
-class LKJCorr(Continuous):
+class LKJCorr(MultivariateContinuous):
     R"""
     The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood.
 
@@ -604,16 +633,30 @@ class LKJCorr(Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, n, p, *args, **kwargs):
-        self.n = n
-        self.p = p
-        n_elem = int(p * (p - 1) / 2)
-        self.mean = np.zeros(n_elem)
-        super(LKJCorr, self).__init__(shape=n_elem, *args, **kwargs)
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p, ndim=0)
+        n_elem = (p * (p - 1) / 2)
+        self.mean = tt.zeros(n_elem)
+        self.shape_support = n_elem
+
+        # FIXME: triu, bcast, etc.
+        self.tri_index = tt.zeros((p, p), dtype=int)
+        self.tri_index[tt.triu(p, k=1)] = tt.arange(n_elem)
+        self.tri_index[tt.triu(p, k=1)[::-1]] = tt.arange(n_elem)
+
+        self.dist_params = (self.n, self.p)
+
+        # TODO: do this correctly; what about replications?
+        shape_supp = tuple(self.p)
+        shape_ind = ()
+
+        # FIXME: this isn't correct/ideal
+        bcast = (False,)
+        if dtype is None:
+            dtype = self.n.dtype
 
-        self.tri_index = np.zeros([p, p], dtype=int)
-        self.tri_index[np.triu_indices(p, k=1)] = np.arange(n_elem)
-        self.tri_index[np.triu_indices(p, k=1)[::-1]] = np.arange(n_elem)
+        super(LKJCorr, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
 
     def _normalizing_constant(self, n, p):
         if n == 1:
diff --git a/pymc3/model.py b/pymc3/model.py
index fa02986b1c..bcff30b774 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -183,7 +183,6 @@ def __init__(self):
         self.potentials = []
         self.missing_values = []
         self.model = self
-
     @property
     @memoize
     def bijection(self):
@@ -243,6 +242,11 @@ def unobserved_RVs(self):
         """List of all random variable, including deterministic ones."""
         return self.vars + self.deterministics
 
+    @property
+    def untransformed_RVs(self):
+        """All variables except those transformed for MCMC"""
+        return [v for v in self.vars if v not in self.hidden_RVs] + self.deterministics
+
     @property
     def test_point(self):
         """Test point used to check that the model doesn't generate errors"""
@@ -504,7 +508,8 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             self.dsize = tt.prod(distribution.shape)
             self.distribution = distribution
             dist_test_val = distribution.default()
-            self.tag.test_value = np.ones(dist_test_val.shape, distribution.dtype) * dist_test_val
+            self.tag.test_value = np.ones(
+                dist_test_val.shape, distribution.dtype) * dist_test_val
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
 

From b1d4cee3dbe0d546bbf05d8349b635c7cae10d82 Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Sat, 21 May 2016 22:45:21 -0500
Subject: [PATCH 04/17] major fixes and small Theano random example comments

---
 pymc3/distributions/continuous.py   | 11 +++--
 pymc3/distributions/distribution.py | 42 ++++++++++--------
 pymc3/distributions/multivariate.py | 68 ++++++++++++++++++-----------
 pymc3/model.py                      |  9 ++--
 4 files changed, 79 insertions(+), 51 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index db6975e8ea..a9a752dd40 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -228,6 +228,9 @@ def __init__(self, *args, **kwargs):
             sd = kwargs.pop('sd', None)
             tau = kwargs.pop('tau', None)
 
+    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+        mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu = mu
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.variance = 1. / self.tau
@@ -677,7 +680,7 @@ class StudentT(UnivariateContinuous):
 
     Describes a normal variable whose precision is gamma distributed.
     If only nu parameter is passed, this specifies a standard (central)
-    Student's tt.
+    Student's T.
 
     .. math::
 
@@ -1012,9 +1015,9 @@ class InverseGamma(PositiveUnivariateContinuous):
         Scale parameter (beta > 0).
     """
     def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.alpha = alpha
-        self.beta = beta
-        self.mean = self._calculate_mean()
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.beta = tt.as_tensor_variable(beta)
+        self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf
         self.mode = beta / (alpha + 1.)
         self.variance = tt.switch(tt.gt(alpha, 2),
                                   (beta**2) / (alpha * (alpha - 1.)**2),
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 7393255588..51249f5642 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -83,10 +83,10 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None
 
         Here are a few ways of inferring shapes:
             * When a distribution is scalar, then `shape_supp = ()`
-                * and has an informative parameter, e.g. `mu`, then `shape_ind = T.shape(mu)`.
+                * and has an informative parameter, e.g. `mu`, then `shape_ind = tt.shape(mu)`.
             * When a distribution is multivariate
                 * and has an informative parameter, e.g. `mu`, then
-                `shape_supp = T.shape(mu)[-ndim_supp:]` and `shape_ind = T.shape(mu)[-ndim_supp:]`.
+                `shape_supp = tt.shape(mu)[-ndim_supp:]` and `shape_ind = tt.shape(mu)[-ndim_supp:]`.
         In all remaining cases the shapes must be provided by the caller.
         `shape_reps` is always provided by the caller.
 
@@ -118,10 +118,10 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None
             A transform function
         """
 
-        self.shape_supp = shape_supp
-        self.shape_ind = shape_ind
-        self.shape_reps = shape_reps
-        self.shape = shape_supp + shape_ind + shape_reps
+        self.shape_supp = tt.cast(tt.as_tensor_variable(shape_supp, ndim=1), 'int64')
+        self.shape_ind = tt.cast(tt.as_tensor_variable(shape_ind, ndim=1), 'int64')
+        self.shape_reps = tt.cast(tt.as_tensor_variable(shape_reps, ndim=1), 'int64')
+        self.shape = tt.concatenate((self.shape_supp, self.shape_ind, self.shape_reps))
 
         #
         # Following Theano Op's handling of shape parameters (well, at least
@@ -138,7 +138,7 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None
         self.testval = testval
         self.defaults = defaults
         self.transform = transform
-        self.type = T.TensorType(str(dtype), bcast)
+        self.type = tt.TensorType(str(dtype), bcast)
 
     def default(self):
         return self.get_test_val(self.testval, self.defaults)
@@ -146,21 +146,26 @@ def default(self):
     def get_test_val(self, val, defaults):
         if val is None:
             for v in defaults:
-                if hasattr(self, v) and np.all(np.isfinite(self.getattr_value(v))):
+                the_attr = getattr(self, v, None)
+
+                if the_attr is not None and np.all(np.isfinite(self.getattr_value(v))):
                     return self.getattr_value(v)
         else:
             return self.getattr_value(val)
 
-        if val is None:
-            raise AttributeError(str(self) + " has no finite default value to use, checked: " +
-                                 str(defaults) + " pass testval argument or adjust so value is finite.")
+        raise AttributeError(str(self) + " has no finite default value to use, checked: " +
+                             str(defaults) + " pass testval argument or adjust so value is finite.")
 
     def getattr_value(self, val):
         if isinstance(val, string_types):
             val = getattr(self, val)
 
-        if isinstance(val, tt.TensorVariable):
+        if isinstance(val, tt.sharedvar.SharedVariable):
+            return val.get_value()
+        elif isinstance(val, tt.TensorVariable):
             return val.tag.test_value
+        elif isinstance(val, tt.TensorConstant):
+            return val.value
 
         if isinstance(val, tt.TensorConstant):
             return val.value
@@ -203,9 +208,10 @@ def __init__(self, ndim, size=(), dtype='int64', defaults=['mode'], *args, **kwa
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
-    def __init__(self, ndim_support, ndim, size, bcast, dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
-        super(Continuous, self).__init__(ndim_support, ndim, size,
-                                         bcast, dtype, defaults=defaults, *args, **kwargs)
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast,
+            dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
+        super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps,
+                bcast, dtype, defaults=defaults, *args, **kwargs)
 
 
 class DensityDist(Distribution):
@@ -231,15 +237,15 @@ def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *a
         """
 
         self.shape_supp = ()
-        dist_params = tuple(T.as_tensor_variable(x) for x in dist_params)
+        dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
 
         ndim, size, bcast = _infer_ndim_bcast(*(ndim, size) + dist_params)
         if dtype is None:
-            dtype = T.scal.upcast(*(T.config.floatX,) + tuple(x.dtype for x in dist_params))
+            dtype = tt.scal.upcast(*(tt.config.floatX,) + tuple(x.dtype for x in dist_params))
 
         # We just assume
         super(UnivariateContinuous, self).__init__(
-            tuple(), tuple(size), size, bcast, *args, **kwargs)
+            tuple(), tuple(), size, bcast, *args, **kwargs)
 
 
 class MultivariateContinuous(Continuous):
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index a45f6425a8..abf8be447c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -99,14 +99,19 @@ def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.dist_params = (self.mu, self.tau)
 
         # TODO: how do we want to use ndim and size?
-        shape_supp = self.mu[-1]
-        shape_ind = self.mu[:-1]
+        shape_supp = self.mu.shape[-1]
+        shape_ind = self.mu.shape[:-1]
+
+        if self.mu.ndim > 0:
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        else:
+            bcast = (False,)
 
-        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
         if dtype is None:
             dtype = self.mu.dtype
 
-        super(MvNormal, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+        super(MvNormal, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                       *args, **kwargs)
 
     def random(self, point=None, size=None):
         mu, cov = draw_values([self.mu, self.cov], point=point)
@@ -257,11 +262,17 @@ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None,
         shape_ind = self.a.shape[:-1]
 
         # FIXME: this isn't correct/ideal
-        bcast = (False,) * (shape_supp.ndim + shape_ind.ndim)
+        if self.a.ndim > 0:
+            bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        else:
+            bcast = (False,)
+
         if dtype is None:
             dtype = self.mu.dtype
 
-        super(Dirichlet, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, transform=transform, *args, **kwargs)
+        super(Dirichlet, self).__init__(shape_supp, shape_ind, (), bcast,
+                                        dtype, transform=transform, *args,
+                                        **kwargs)
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point)
@@ -270,7 +281,7 @@ def _random(a, size=None):
             return stats.dirichlet.rvs(a, None if size == a.shape else size)
 
         samples = generate_samples(_random, a,
-                                   dist_shape=self.shape_support,
+                                   dist_shape=self.shape_supp,
                                    size=size)
         return samples
 
@@ -318,9 +329,9 @@ class Multinomial(MultivariateDiscrete):
         rescaled otherwise.
     """
     def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        super(Multinomial, self).__init__(*args, **kwargs)
-
-        p = p / tt.sum(p, axis=-1, keepdims=True)
+        self.n = tt.as_tensor_variable(n, ndim=0)
+        self.p = tt.as_tensor_variable(p)
+        self.p = self.p / tt.sum(self.p, axis=-1, keepdims=True)
 
         if len(self.shape) == 2:
             try:
@@ -342,7 +353,7 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         # TODO: do this correctly; what about replications?
         # check that n == len(p)?
-        shape_supp = tuple(self.n)
+        shape_supp = self.n
         shape_ind = ()
 
         # FIXME: this isn't correct/ideal
@@ -351,7 +362,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
             dtype = self.p.dtype
 
         # FIXME: n.shape == p.shape? bcast, etc.
-        super(Multinomial, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+        super(Multinomial, self).__init__(shape_supp, shape_ind, (), bcast,
+                                          dtype, *args, **kwargs)
 
     def _random(self, n, p, size=None):
         if size == p.shape:
@@ -478,27 +490,29 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
         # TODO: allow tensor of independent n's.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.V = tt.as_tensor_variable(V)
-        self.mean = n * V
-        self.mode = tt.switch(1 * (n >= self.shape_support + 1),
-                             (n - self.shape_support - 1) * V,
-                             np.nan)
 
         self.dist_params = (self.n, self.V)
 
         # TODO: do this correctly; what about replications?
-        shape_supp = tuple(self.V.shape[-1])
+        shape_supp = self.V.shape[-1]
         shape_ind = ()
 
+        self.mode = tt.switch(1 * (self.n >= shape_supp + 1), (self.n -
+                                                               shape_supp - 1)
+                              * self.V, np.nan)
+        self.mean = self.n * self.V
+
         # FIXME: this isn't correct/ideal
         bcast = (False,)
         if dtype is None:
             dtype = self.V.dtype
 
-        super(Wishart, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+        super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                      *args, **kwargs)
 
     def logp(self, X):
         n = self.n
-        p = self.shape_support
+        p = self.shape_supp
         V = self.V
 
         IVI = det(V)
@@ -634,21 +648,22 @@ class LKJCorr(MultivariateContinuous):
     """
 
     def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+        # TODO: allow tensor of independent n's.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p, ndim=0)
-        n_elem = (p * (p - 1) / 2)
+
+        n_elem = (self.p * (self.p - 1) / 2)
+        shape_supp = n_elem
         self.mean = tt.zeros(n_elem)
-        self.shape_support = n_elem
 
         # FIXME: triu, bcast, etc.
-        self.tri_index = tt.zeros((p, p), dtype=int)
-        self.tri_index[tt.triu(p, k=1)] = tt.arange(n_elem)
-        self.tri_index[tt.triu(p, k=1)[::-1]] = tt.arange(n_elem)
+        self.tri_index = tt.zeros((self.p, self.p), dtype=int)
+        self.tri_index[tt.triu(self.p, k=1)] = tt.arange(n_elem)
+        self.tri_index[tt.triu(self.p, k=1)[::-1]] = tt.arange(n_elem)
 
         self.dist_params = (self.n, self.p)
 
         # TODO: do this correctly; what about replications?
-        shape_supp = tuple(self.p)
         shape_ind = ()
 
         # FIXME: this isn't correct/ideal
@@ -656,7 +671,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         if dtype is None:
             dtype = self.n.dtype
 
-        super(LKJCorr, self).__init__(shape_supp, shape_ind, (), ndim, bcast, dtype, *args, **kwargs)
+        super(LKJCorr, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+                                      *args, **kwargs)
 
     def _normalizing_constant(self, n, p):
         if n == 1:
diff --git a/pymc3/model.py b/pymc3/model.py
index bcff30b774..88109002c9 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -507,9 +507,12 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             self.dshape = distribution.shape
             self.dsize = tt.prod(distribution.shape)
             self.distribution = distribution
-            dist_test_val = distribution.default()
-            self.tag.test_value = np.ones(
-                dist_test_val.shape, distribution.dtype) * dist_test_val
+            # TODO: why should we do this?  shouldn't the default/test_val
+            # of the distribution be enough?
+            #dist_test_val = distribution.default()
+            #self.tag.test_value = np.ones(
+            #    distribution.shape.tag.test_value, distribution.dtype) * dist_test_val
+            self.tag.test_value = distribution.default()
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
 

From 6b47c6486bc7070188188d8ee967212384ddc634 Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Mon, 30 May 2016 15:18:54 -0500
Subject: [PATCH 05/17] fixed (some) handling of symbolic scalar shapes

---
 pymc3/distributions/distribution.py | 69 +++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 51249f5642..8a5a14f347 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -16,6 +16,32 @@
 class _Unpickling(object):
     pass
 
+def _as_tensor_shape_variable(var):
+    """ Just a collection of useful shape stuff from
+    `_infer_ndim_bcast` """
+
+    if var is None:
+        return tt.constant([], dtype='int64')
+
+    res = var
+    if isinstance(res, (tuple, list)):
+        if len(res) == 0:
+            return tt.constant([], dtype='int64')
+        res = tt.as_tensor_variable(res, ndim=1)
+
+    else:
+        if res.ndim != 1:
+            raise TypeError("shape must be a vector or list of scalar, got\
+                            '%s'" % res)
+
+    if (not (res.dtype.startswith('int') or
+             res.dtype.startswith('uint'))):
+
+        raise TypeError('shape must be an integer vector or list',
+                        res.dtype)
+    return res
+
+
 class Distribution(object):
     """Statistical distribution"""
     def __new__(cls, name, *args, **kwargs):
@@ -44,7 +70,8 @@ def dist(cls, *args, **kwargs):
         dist.__init__(*args, **kwargs)
         return dist
 
-    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None, defaults=[], transform=None):
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
+                 testval=None, defaults=None, transform=None):
         r"""
         Distributions are specified in terms of the shape of their support, the shape
         of the space of independent instances and the shape of the space of replications.
@@ -118,23 +145,29 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype, testval=None
             A transform function
         """
 
-        self.shape_supp = tt.cast(tt.as_tensor_variable(shape_supp, ndim=1), 'int64')
-        self.shape_ind = tt.cast(tt.as_tensor_variable(shape_ind, ndim=1), 'int64')
-        self.shape_reps = tt.cast(tt.as_tensor_variable(shape_reps, ndim=1), 'int64')
-        self.shape = tt.concatenate((self.shape_supp, self.shape_ind, self.shape_reps))
-
-        #
-        # Following Theano Op's handling of shape parameters (well, at least
-        # theano.tensor.raw_random.RandomFunction.make_node).
-        #
-        #if self.shape.type.ndim != 1 or \
-        #        not (self.shape.type.dtype == 'int64') and \
-        #        not (self.shape.type.dtype == 'int32'):
-        #    raise TypeError("Expected int elements in shape")
-
-        #self.ndim = self.shape.ndim
-        #self.dtype = dtype
-        #self.bcast = bcast
+        self.shape_supp = _as_tensor_shape_variable(shape_supp)
+        self.ndim_supp = tt.get_vector_length(self.shape_supp)
+        self.shape_ind = _as_tensor_shape_variable(shape_ind)
+        self.ndim_ind = tt.get_vector_length(self.shape_ind)
+        self.shape_reps = _as_tensor_shape_variable(shape_reps)
+        self.ndim_reps = tt.get_vector_length(self.shape_reps)
+
+        ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
+        if ndim_sum == 0:
+            self.shape = tt.constant([], dtype='int64')
+        else:
+            self.shape = tuple(self.shape_reps) +\
+                tuple(self.shape_ind) +\
+                tuple(self.shape_supp)
+
+        if testval is None:
+            if ndim_sum == 0:
+                testval = tt.constant(0, dtype=dtype)
+            else:
+                testval = tt.zeros(self.shape)
+
+        self.ndim = tt.get_vector_length(self.shape)
+
         self.testval = testval
         self.defaults = defaults
         self.transform = transform

From 69c6ffe2316310863d7c1cdc12d4719612086319 Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Sat, 21 May 2016 19:23:00 -0500
Subject: [PATCH 06/17] initial distribution shape refactoring

---
 pymc3/distributions/continuous.py   | 148 ++++++++++++++++------------
 pymc3/distributions/discrete.py     |  36 ++++---
 pymc3/distributions/distribution.py |  37 ++++---
 pymc3/distributions/multivariate.py |  46 +++++----
 pymc3/model.py                      |   7 +-
 5 files changed, 163 insertions(+), 111 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index a9a752dd40..cd99e18606 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -28,7 +28,8 @@ class PositiveUnivariateContinuous(UnivariateContinuous):
 
     def __init__(self, *args, **kwargs):
         transform = kwargs.get('transform', transforms.log)
-        super(PositiveUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
+        super(PositiveUnivariateContinuous, self).__init__(transform=transform,
+                                                           *args, **kwargs)
 
 
 class UnitUnivariateContinuous(UnivariateContinuous):
@@ -36,7 +37,8 @@ class UnitUnivariateContinuous(UnivariateContinuous):
 
     def __init__(self, *args, **kwargs):
         transform = kwargs.get('transform', transforms.logodds)
-        super(UnitUnivariateContinuous, self).__init__(transform=transform, *args, **kwargs)
+        super(UnitUnivariateContinuous, self).__init__(transform=transform,
+                                                       *args, **kwargs)
 
 def assert_negative_support(var, label, distname, value=-1e-6):
     # Checks for evidence of positive support for a variable
@@ -123,7 +125,8 @@ class Uniform(UnivariateContinuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, transform='interval', size=None, ndim=None, dtype=None, *args, **kwargs):
+    def __init__(self, lower=0, upper=1, transform='interval', size=None,
+                 ndim=None, dtype=None, *args, **kwargs):
 
         lower = tt.as_tensor_variable(lower)
         upper = tt.as_tensor_variable(upper)
@@ -133,7 +136,8 @@ def __init__(self, lower=0, upper=1, transform='interval', size=None, ndim=None,
         self.mean = (upper + lower) / 2.
         self.median = self.mean
 
-        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
         if transform == 'interval':
             self.transform = transforms.interval(lower, upper)
@@ -164,7 +168,8 @@ def __init__(self, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.median = tt.as_tensor_variable(0.)
         self.dist_params = (self.median,)
 
-        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         raise ValueError('Cannot sample from Flat distribution')
@@ -228,7 +233,8 @@ def __init__(self, *args, **kwargs):
             sd = kwargs.pop('sd', None)
             tau = kwargs.pop('tau', None)
 
-    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None,
+                 dtype=None, *args, **kwargs):
 
         mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu = mu
@@ -240,7 +246,8 @@ def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None, dtype=None,
 
         self.dist_params = (self.mu, self.tau)
 
-        super(Normal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Normal, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, tau, sd = draw_values([self.mu, self.tau, self.sd],
@@ -280,7 +287,9 @@ class HalfNormal(PositiveUnivariateContinuous):
     tau : float
         Precision (tau > 0).
     """
-    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None,
+                 *args, **kwargs):
+
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
         self.variance = (1. - 2 / np.pi) / self.tau
@@ -290,7 +299,8 @@ def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None, *args, *
 
         self.dist_params = (self.tau,)
 
-        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         sd = draw_values([self.sd], point=point)
@@ -362,7 +372,8 @@ class Wald(PositiveUnivariateContinuous):
         The American Statistician, Vol. 30, No. 2, pp. 88-90
     """
 
-    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
 
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
@@ -377,7 +388,8 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None, size=None,
 
         self.dist_params = (self.mu, self.lam, self.alpha)
 
-        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def get_mu_lam_phi(self, mu, lam, phi):
         res = None
@@ -421,13 +433,12 @@ def logp(self, value):
         lam = self.lam
         alpha = self.alpha
         # value *must* be iid. Otherwise this is wrong.
-        return bound(logpow(lam / (2. * np.pi), 0.5)
-                     - logpow(value - alpha, 1.5)
-                     - (0.5 * lam / (value - alpha)
-                        * ((value - alpha - mu) / mu)**2),
+        return bound(logpow(lam / (2. * np.pi), 0.5) -
+                     logpow(value - alpha, 1.5) -
+                     (0.5 * lam / (value - alpha) * ((value - alpha - mu) /
+                                                     mu)**2),
                      # XXX these two are redundant. Please, check.
-                     value > 0, value - alpha > 0,
-                     mu > 0, lam > 0, alpha >= 0)
+                     value > 0, value - alpha > 0, mu > 0, lam > 0, alpha >= 0)
 
 
 class Beta(UnitUnivariateContinuous):
@@ -473,7 +484,8 @@ class Beta(UnitUnivariateContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -486,7 +498,8 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                   **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -512,9 +525,8 @@ def logp(self, value):
         alpha = self.alpha
         beta = self.beta
 
-        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1)
-                     - betaln(alpha, beta),
-                     value >= 0, value <= 1,
+        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1) -
+                     betaln(alpha, beta), value >= 0, value <= 1,
                      alpha > 0, beta > 0)
 
 
@@ -548,8 +560,6 @@ def __init__(self, lam, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.variance = lam**-2
 
-        assert_negative_support(lam, 'lam', 'Exponential')
-
         super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
@@ -596,7 +606,8 @@ def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.b, self.mu)
 
-        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, b = draw_values([self.mu, self.b], point=point)
@@ -639,21 +650,22 @@ class Lognormal(PositiveUnivariateContinuous):
     tau : float
         Scale parameter (tau > 0).
     """
-    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mu = mu
-        self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
-
-        self.mean = tt.exp(mu + 1. / (2 * self.tau))
-        self.median = tt.exp(mu)
-        self.mode = tt.exp(mu - 1. / self.tau)
-        self.variance = (tt.exp(1. / self.tau) - 1) * tt.exp(2 * mu + 1. / self.tau)
+    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.tau = tt.as_tensor_variable(tau)
+        self.mean = tt.exp(self.mu + 1. / (2. * self.tau))
+        self.median = tt.exp(self.mu)
+        self.mode = tt.exp(self.mu - 1. / self.tau)
+        self.variance = (tt.exp(1. / self.tau) - 1.) * tt.exp(2 * self.mu + 1. / self.tau)
 
         assert_negative_support(tau, 'tau', 'Lognormal')
         assert_negative_support(sd, 'sd', 'Lognormal')
 
         self.dist_params = (self.mu, self.tau)
 
-        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype,
+                                        *args, **kwargs)
 
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
@@ -702,9 +714,8 @@ class StudentT(UnivariateContinuous):
     lam : float
         Scale parameter (lam > 0).
     """
-
-    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None,
-                 size=None, dtype=None, *args, **kwargs):
+    def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None,
+                 dtype=None, *args, **kwargs):
         self.nu = nu = tt.as_tensor_variable(nu)
         self.lam, self.sd = get_tau_sd(tau=lam, sd=sd)
         self.mean = self.median = self.mode = self.mu = mu
@@ -767,11 +778,11 @@ class Pareto(PositiveUnivariateContinuous):
     def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.m = tt.as_tensor_variable(m)
-        self.mean = tt.switch(tt.gt(alpha, 1), alpha * m / (alpha - 1.), np.inf)
-        self.median = m * 2.**(1. / alpha)
+        self.mean = tt.switch(tt.gt(self.alpha, 1), self.alpha * self.m / (self.alpha - 1.), np.inf)
+        self.median = self.m * 2.**(1. / self.alpha)
         self.variance = tt.switch(
-            tt.gt(alpha, 2),
-            (alpha * m**2) / ((alpha - 2.) * (alpha - 1.)**2),
+            tt.gt(self.alpha, 2),
+            (self.alpha * self.m**2) / ((self.alpha - 2.) * (self.alpha - 1.)**2),
             np.inf)
 
         assert_negative_support(alpha, 'alpha', 'Pareto')
@@ -779,7 +790,8 @@ def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.alpha, self.m)
 
-        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
@@ -832,7 +844,8 @@ def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwarg
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype,
+                                     *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'Cauchy')
 
@@ -883,7 +896,8 @@ def __init__(self, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.beta,)
 
-        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'HalfCauchy')
 
@@ -943,20 +957,22 @@ class Gamma(PositiveUnivariateContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
-        self.mean = alpha / beta
-        self.mode = tt.maximum((alpha - 1) / beta, 0)
-        self.variance = alpha / beta**2
+        self.mean = self.alpha / self.beta
+        self.mode = tt.maximum((self.alpha - 1) / self.beta, 0)
+        self.variance = self.alpha / self.beta**2
 
         assert_negative_support(alpha, 'alpha', 'Gamma')
         assert_negative_support(beta, 'beta', 'Gamma')
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args,
+                                    **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -1025,6 +1041,8 @@ def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kw
         assert_negative_support(alpha, 'alpha', 'InverseGamma')
         assert_negative_support(beta, 'beta', 'InverseGamma')
 
+        self.dist_params = (self.alpha, self.beta)
+
         super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _calculate_mean(self):
@@ -1035,9 +1053,6 @@ def _calculate_mean(self):
             m[self.alpha <= 1] = np.inf
             return m
 
-        self.dist_params = (self.alpha, self.beta)
-
-
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
                                   point=point)
@@ -1048,8 +1063,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value
-                     + logpow(value, -alpha - 1),
+        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value +
+                     logpow(value, -alpha - 1),
                      value > 0, alpha > 0, beta > 0)
 
 
@@ -1075,7 +1090,9 @@ class ChiSquared(Gamma):
 
     def __init__(self, nu, *args, **kwargs):
         self.nu = tt.as_tensor_variable(nu)
-        super(ChiSquared, self).__init__(alpha=self.nu / 2., beta=tt.as_tensor_variable(0.5), *args, **kwargs)
+        super(ChiSquared, self).__init__(alpha=self.nu / 2.,
+                                         beta=tt.as_tensor_variable(0.5),
+                                         *args, **kwargs)
 
 
 class Weibull(PositiveUnivariateContinuous):
@@ -1114,7 +1131,8 @@ def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwarg
 
         self.dist_params = (self.alpha, self.beta)
 
-        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype,
+                                      *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -1301,15 +1319,16 @@ def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwa
         self.mu = tt.as_tensor_variable(mu)
         self.sigma = tt.as_tensor_variable(sigma)
         self.nu = tt.as_tensor_variable(nu)
-        self.mean = mu + nu
-        self.variance = (sigma**2) + (nu**2)
+        self.mean = self.mu + self.nu
+        self.variance = self.sigma**2 + self.nu**2
 
         assert_negative_support(sigma, 'sigma', 'ExGaussian')
         assert_negative_support(nu, 'nu', 'ExGaussian')
 
         self.dist_params = (self.mu, self.sigma, self.nu)
 
-        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype,
+                                         *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
@@ -1361,15 +1380,16 @@ class VonMises(UnivariateContinuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
-            mu)
+    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None,
+                 size=None, dtype=None, *args, **kwargs):
+        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
         self.kappa = tt.as_tensor_variable(kappa)
-        self.variance = 1. - i1(kappa) / i0(kappa)
+        self.variance = 1. - i1(self.kappa) / i0(self.kappa)
 
         self.dist_params = (self.mu, self.kappa)
 
-        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype,
+                                       *args, **kwargs)
 
         if transform == 'circular':
             self.transform = transforms.Circular()
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index cf0a56f597..bb7ed4125f 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -44,7 +44,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.n, self.p)
 
-        super(Binomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Binomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         n, p = draw_values([self.n, self.p], point=point)
@@ -90,6 +91,7 @@ class BetaBinomial(UnivariateDiscrete):
     beta : float
         beta > 0.
     """
+
     def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
@@ -98,7 +100,8 @@ def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kw
 
         self.dist_params = (self.alpha, self.beta, self.n)
 
-        super(BetaBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(BetaBinomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -154,7 +157,8 @@ def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.p,)
         # FIXME: bcast, etc.
-        super(Bernoulli, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Bernoulli, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -202,7 +206,8 @@ def __init__(self, mu, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.mu,)
 
-        super(Poisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Poisson, self).__init__(self.dist_params,
+                                      ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu = draw_values([self.mu], point=point)
@@ -252,7 +257,8 @@ def __init__(self, mu, alpha, ndim=None, size=None, dtype=None, *args, **kwargs)
 
         self.dist_params = (self.mu, self.alpha)
 
-        super(NegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(NegativeBinomial, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point)
@@ -301,7 +307,8 @@ def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.mode = tt.as_tensor_variable(1)
         self.dist_params = (self.p,)
 
-        super(Geometric, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Geometric, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -334,6 +341,7 @@ class DiscreteUniform(UnivariateDiscrete):
     upper : int
         Upper limit (upper > lower).
     """
+
     def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.lower = tt.floor(lower).astype('int32')
         self.upper = tt.floor(upper).astype('int32')
@@ -341,7 +349,8 @@ def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwar
             tt.floor((upper - lower) / 2.).astype('int32'), self.lower)
         self.dist_params = (self.lower, self.upper)
 
-        super(DiscreteUniform, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(DiscreteUniform, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -397,8 +406,8 @@ def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.dist_params = (self.p,)
         # FIXME: The stated support is univariate?
 
-        super(Categorical, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
-
+        super(Categorical, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         def random_choice(k, *args, **kwargs):
@@ -437,10 +446,12 @@ class ConstantDist(UnivariateDiscrete):
     value : float or int
         Constant parameter.
     """
+
     def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
         self.dist_params = (self.c,)
-        super(ConstantDist, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ConstantDist, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         c = draw_values([self.c], point=point)
@@ -497,9 +508,10 @@ def __init__(self, theta, psi, ndim=None, size=None, dtype=None, *args, **kwargs
         self.pois = Poisson.dist(theta)
         self.mode = self.pois.mode
 
-        self.dist_params = (self.theta, self.z)
+        self.dist_params = (self.theta, self.psi)
 
-        super(ZeroInflatedPoisson, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ZeroInflatedPoisson, self).__init__(
+            self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         theta, psi = draw_values([self.theta, self.psi], point=point)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8a5a14f347..3b9f6470b1 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -212,10 +212,14 @@ def TensorType(dtype, shape):
 
 class NoDistribution(Distribution):
 
-    def __init__(self, shape, dtype, testval=None, defaults=[], transform=None, parent_dist=None, *args, **kwargs):
-        super(NoDistribution, self).__init__(shape=shape, dtype=dtype,
-                                             testval=testval, defaults=defaults,
-                                             *args, **kwargs)
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
+                 testval=None, defaults=[], transform=None, parent_dist=None,
+                 *args, **kwargs):
+        super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps,
+                                             bcast, dtype=dtype,
+                                             testval=testval,
+                                             defaults=defaults, *args,
+                                             **kwargs)
         self.parent_dist = parent_dist
 
     def __getattr__(self, name):
@@ -230,35 +234,40 @@ def logp(self, x):
 
 class Discrete(Distribution):
     """Base class for discrete distributions"""
-
-    def __init__(self, ndim, size=(), dtype='int64', defaults=['mode'], *args, **kwargs):
+    def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype='int64',
+                 defaults=['mode'], *args, **kwargs):
         if dtype != 'int64':
             raise TypeError('Discrete classes expect dtype to be int64.')
-        super(Discrete, self).__init__(
-            ndim, size, dtype, defaults=defaults, *args, **kwargs)
+        super(Discrete, self).__init__(shape_supp, shape_ind, shape_reps,
+                                       bcast, dtype, defaults=defaults, *args,
+                                       **kwargs)
 
 
 class Continuous(Distribution):
     """Base class for continuous distributions"""
 
     def __init__(self, shape_supp, shape_ind, shape_reps, bcast,
-            dtype='float64', defaults=['median', 'mean', 'mode'], *args, **kwargs):
+                 dtype='float64', defaults=['median', 'mean', 'mode'], *args,
+                 **kwargs):
         super(Continuous, self).__init__(shape_supp, shape_ind, shape_reps,
-                bcast, dtype, defaults=defaults, *args, **kwargs)
+                                         bcast, dtype, defaults=defaults,
+                                         *args, **kwargs)
 
 
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64', *args, **kwargs):
-        super(DensityDist, self).__init__(
-            ndim, size, bcast, dtype, *args, **kwargs)
+    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64',
+                 *args, **kwargs):
+        super(DensityDist, self).__init__(ndim, size, bcast, dtype, *args,
+                                          **kwargs)
         self.logp = logp
 
 
 class UnivariateContinuous(Continuous):
 
-    def __init__(self, dist_params, ndim=None, size=None, dtype=None, bcast=None, *args, **kwargs):
+    def __init__(self, dist_params, ndim=None, size=None, dtype=None,
+                 bcast=None, *args, **kwargs):
         """
 
         Parameters
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index abf8be447c..107cb66ff9 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -88,17 +88,20 @@ class MvNormal(MultivariateContinuous):
         TODO
 
     """
-    def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         warnings.warn(('The calling signature of MvNormal() has changed. The new signature is:\n'
                        'MvNormal(name, mu, cov) instead of MvNormal(name, mu, tau).'
                        'You do not need to explicitly invert the covariance matrix anymore.'),
                       DeprecationWarning)
-        self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
-        self.tau, self.cov = get_tau_cov(mu, tau=tau, cov=cov)
+        # TODO: Need to apply replications/size.
+        self.mu = tt.as_tensor_variable(mu)
+        self.tau = tt.as_tensor_variable(tau)
 
         self.dist_params = (self.mu, self.tau)
 
-        # TODO: how do we want to use ndim and size?
+        self.median = self.mode = self.mean = self.mu
+        # TODO: How do we want to use ndim?
         shape_supp = self.mu.shape[-1]
         shape_ind = self.mu.shape[:-1]
 
@@ -167,10 +170,11 @@ class MvStudentT(MultivariateContinuous):
         Vector of means.
     """
     def __init__(self, nu, Sigma, mu=None, ndim=None, size=None, dtype=None, *args, **kwargs):
+        # TODO: Need to apply replications/size.
         self.nu = tt.as_tensor_variable(nu)
         self.Sigma = tt.as_tensor_variable(Sigma)
 
-        # TODO: do this correctly; what about replications?
+        # TODO: How do we want to use ndim?
         shape_supp = self.Sigma.shape[0]
         shape_ind = self.Sigma.shape[:-2]
 
@@ -246,18 +250,16 @@ class Dirichlet(MultivariateContinuous):
     as a parent of Multinomial and Categorical nevertheless.
     """
     def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, dtype=None, *args, **kwargs):
+        # TODO: Need to apply replications/size.
         self.a = tt.as_tensor_variable(a)
         self.mean = self.a / tt.sum(self.a)
         self.mode = tt.switch(tt.all(self.a > 1),
-                             (self.a - 1) / tt.sum(self.a - 1),
-                             np.nan)
+                              (self.a - 1) / tt.sum(self.a - 1),
+                              np.nan)
 
         self.dist_params = (self.a,)
 
-        self.mode = tt.switch(tt.all(a > 1),
-                              (a - 1) / tt.sum(a - 1),
-                              np.nan)
-        # TODO: do this correctly; what about replications?
+        # TODO: How do we want to use ndim?
         shape_supp = self.a.shape[-1]
         shape_ind = self.a.shape[:-1]
 
@@ -328,7 +330,9 @@ class Multinomial(MultivariateDiscrete):
         be non-negative and sum to 1 along the last axis. They will be automatically
         rescaled otherwise.
     """
-    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
+        # TODO: Need to apply replications/size.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p)
         self.p = self.p / tt.sum(self.p, axis=-1, keepdims=True)
@@ -351,8 +355,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         self.dist_params = (self.n, self.p)
 
-        # TODO: do this correctly; what about replications?
-        # check that n == len(p)?
+        # TODO: check that n == len(p)?
+        # TODO: How do we want to use ndim?
         shape_supp = self.n
         shape_ind = ()
 
@@ -479,7 +483,8 @@ class Wishart(MultivariateContinuous):
     use WishartBartlett or LKJCorr.
     """
 
-    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args,
+                 **kwargs):
         warnings.warn('The Wishart distribution can currently not be used '
                       'for MCMC sampling. The probability of sampling a '
                       'symmetric matrix is basically zero. Instead, please '
@@ -487,13 +492,13 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
                       'For more information on the issues surrounding the '
                       'Wishart see here: https://github.com/pymc-devs/pymc3/issues/538.',
                       UserWarning)
-        # TODO: allow tensor of independent n's.
+        # TODO: Need to apply replications/size.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.V = tt.as_tensor_variable(V)
 
         self.dist_params = (self.n, self.V)
 
-        # TODO: do this correctly; what about replications?
+        # TODO: How do we want to use ndim?
         shape_supp = self.V.shape[-1]
         shape_ind = ()
 
@@ -507,6 +512,10 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args, **kwargs):
         if dtype is None:
             dtype = self.V.dtype
 
+        self.mode = tt.switch(1 * (self.n >= self.shape_supp + 1),
+                              (self.n - self.shape_supp - 1) * self.V,
+                              np.nan)
+
         super(Wishart, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
                                       *args, **kwargs)
 
@@ -648,10 +657,11 @@ class LKJCorr(MultivariateContinuous):
     """
 
     def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
-        # TODO: allow tensor of independent n's.
+        # TODO: Need to apply replications/size.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p, ndim=0)
 
+        # TODO: How do we want to use ndim?
         n_elem = (self.p * (self.p - 1) / 2)
         shape_supp = n_elem
         self.mean = tt.zeros(n_elem)
diff --git a/pymc3/model.py b/pymc3/model.py
index 88109002c9..ad80cfe140 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -509,9 +509,10 @@ def __init__(self, type=None, owner=None, index=None, name=None,
             self.distribution = distribution
             # TODO: why should we do this?  shouldn't the default/test_val
             # of the distribution be enough?
-            #dist_test_val = distribution.default()
-            #self.tag.test_value = np.ones(
-            #    distribution.shape.tag.test_value, distribution.dtype) * dist_test_val
+            # dist_test_val = distribution.default()
+            # self.tag.test_value = np.ones(
+            #     distribution.shape.tag.test_value, distribution.dtype) *
+            #     dist_test_val
             self.tag.test_value = distribution.default()
             self.logp_elemwiset = distribution.logp(self)
             self.model = model

From 9f9917879a6331ed25bf5cef2e0a23d5d8075cc3 Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Mon, 30 May 2016 15:18:54 -0500
Subject: [PATCH 07/17] fixed (some) handling of symbolic scalar shapes

---
 pymc3/distributions/distribution.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 3b9f6470b1..4ddcea4170 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -42,6 +42,32 @@ def _as_tensor_shape_variable(var):
     return res
 
 
+def _as_tensor_shape_variable(var):
+    """ Just a collection of useful shape stuff from
+    `_infer_ndim_bcast` """
+
+    if var is None:
+        return T.constant([], dtype='int64')
+
+    res = var
+    if isinstance(res, (tuple, list)):
+        if len(res) == 0:
+            return T.constant([], dtype='int64')
+        res = T.as_tensor_variable(res, ndim=1)
+
+    else:
+        if res.ndim != 1:
+            raise TypeError("shape must be a vector or list of scalar, got\
+                            '%s'" % res)
+
+    if (not (res.dtype.startswith('int') or
+             res.dtype.startswith('uint'))):
+
+        raise TypeError('shape must be an integer vector or list',
+                        res.dtype)
+    return res
+
+
 class Distribution(object):
     """Statistical distribution"""
     def __new__(cls, name, *args, **kwargs):

From 9563d8a190fbc7a8b9c669892246edd909a907ed Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Tue, 31 May 2016 13:27:28 -0500
Subject: [PATCH 08/17] started splitting up dist. classes; some small testval
 test changes

---
 pymc3/distributions/continuous.py   | 34 ++++++++------
 pymc3/distributions/discrete.py     | 23 +++++-----
 pymc3/distributions/distribution.py | 69 +++++++----------------------
 pymc3/distributions/multivariate.py | 19 ++++----
 pymc3/model.py                      | 30 +++++++++----
 5 files changed, 78 insertions(+), 97 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index cd99e18606..1a02d4b608 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -9,12 +9,13 @@
 
 import numpy as np
 import theano.tensor as tt
+from theano.gof.op import get_test_value
 from scipy import stats
 import warnings
 
 from . import transforms
 from .dist_math import bound, logpow, gammaln, betaln, std_cdf, i0, i1
-from .distribution import UnivariateContinuous, draw_values, generate_samples
+from .distribution import Univariate, Continuous, draw_values, generate_samples
 
 __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
            'StudentT', 'Cauchy', 'HalfCauchy', 'Gamma', 'Weibull',
@@ -23,16 +24,24 @@
            'VonMises', 'SkewNormal']
 
 
-class PositiveUnivariateContinuous(UnivariateContinuous):
+class PositiveUnivariateContinuous(Univariate, Continuous):
     """Base class for positive univariate continuous distributions"""
 
     def __init__(self, *args, **kwargs):
         transform = kwargs.get('transform', transforms.log)
         super(PositiveUnivariateContinuous, self).__init__(transform=transform,
                                                            *args, **kwargs)
+        # TODO: is there a better way to use Theano's
+        # existing `...tag.test_value` mechanics?
+        ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
+        if self.testval is None:
+            if ndim_sum == 0:
+                self.testval = 0.5
+            else:
+                self.testval = get_test_value(tt.alloc(*((0.5,) + self.shape)))
 
 
-class UnitUnivariateContinuous(UnivariateContinuous):
+class UnitUnivariateContinuous(Univariate, Continuous):
     """Base class for univariate continuous distributions in [0,1]"""
 
     def __init__(self, *args, **kwargs):
@@ -103,7 +112,7 @@ def get_tau_sd(tau=None, sd=None):
     return (tt.as_tensor_variable(tau), tt.as_tensor_variable(sd))
 
 
-class Uniform(UnivariateContinuous):
+class Uniform(Univariate, Continuous):
     R"""
     Continuous uniform log-likelihood.
 
@@ -157,7 +166,7 @@ def logp(self, value):
                               value >= lower, value <= upper)
 
 
-class Flat(UnivariateContinuous):
+class Flat(Univariate, Continuous):
     """
     Uninformative log-likelihood that returns 0 regardless of
     the passed value.
@@ -178,7 +187,7 @@ def logp(self, value):
         return tt.zeros_like(value)
 
 
-class Normal(UnivariateContinuous):
+class Normal(Univariate, Continuous):
     R"""
     Univariate normal log-likelihood.
 
@@ -573,7 +582,7 @@ def logp(self, value):
         return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
 
 
-class Laplace(UnivariateContinuous):
+class Laplace(Univariate, Continuous):
     R"""
     Laplace log-likelihood.
 
@@ -686,7 +695,7 @@ def logp(self, value):
                      tau > 0)
 
 
-class StudentT(UnivariateContinuous):
+class StudentT(Univariate, Continuous):
     r"""
     Non-central Student's T log-likelihood.
 
@@ -812,7 +821,7 @@ def logp(self, value):
                      value >= m, alpha > 0, m > 0)
 
 
-class Cauchy(UnivariateContinuous):
+class Cauchy(Univariate, Continuous):
     R"""
     Cauchy log-likelihood.
 
@@ -1154,7 +1163,7 @@ def logp(self, value):
                      value >= 0, alpha > 0, beta > 0)
 
 
-class Bounded(UnivariateContinuous):
+class Bounded(Univariate, Continuous):
     R"""
     An upper, lower or upper+lower bounded distribution
 
@@ -1269,8 +1278,7 @@ def StudentTpos(*args, **kwargs):
 
 HalfStudentT = Bound(StudentT, lower=0)
 
-
-class ExGaussian(UnivariateContinuous):
+class ExGaussian(Univariate, Continuous):
     R"""
     Exponentially modified Gaussian log-likelihood.
 
@@ -1356,7 +1364,7 @@ def logp(self, value):
         return bound(lp, sigma > 0., nu > 0.)
 
 
-class VonMises(UnivariateContinuous):
+class VonMises(Univariate, Continuous):
     R"""
     Univariate VonMises log-likelihood.
 
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index bb7ed4125f..7c877877de 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -6,7 +6,7 @@
 from scipy import stats
 
 from .dist_math import bound, factln, binomln, betaln, logpow
-from .distribution import UnivariateDiscrete, draw_values, generate_samples
+from .distribution import Univariate, Discrete, draw_values, generate_samples
 
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'Poisson',
            'NegativeBinomial', 'ConstantDist', 'Constant', 'ZeroInflatedPoisson',
@@ -14,7 +14,7 @@
            'Categorical']
 
 
-class Binomial(UnivariateDiscrete):
+class Binomial(Univariate, Discrete):
     R"""
     Binomial log-likelihood.
 
@@ -63,7 +63,7 @@ def logp(self, value):
             0 <= p, p <= 1)
 
 
-class BetaBinomial(UnivariateDiscrete):
+class BetaBinomial(Univariate, Discrete):
     R"""
     Beta-binomial log-likelihood.
 
@@ -132,7 +132,7 @@ def logp(self, value):
                      alpha > 0, beta > 0)
 
 
-class Bernoulli(UnivariateDiscrete):
+class Bernoulli(Univariate, Discrete):
     R"""Bernoulli log-likelihood
 
     The Bernoulli distribution describes the probability of successes
@@ -174,7 +174,7 @@ def logp(self, value):
             p >= 0, p <= 1)
 
 
-class Poisson(UnivariateDiscrete):
+class Poisson(Univariate, Discrete):
     R"""
     Poisson log-likelihood.
 
@@ -225,7 +225,7 @@ def logp(self, value):
                          0, log_prob)
 
 
-class NegativeBinomial(UnivariateDiscrete):
+class NegativeBinomial(Univariate, Discrete):
     R"""
     Negative binomial log-likelihood.
 
@@ -282,7 +282,7 @@ def logp(self, value):
                          negbinom)
 
 
-class Geometric(UnivariateDiscrete):
+class Geometric(Univariate, Discrete):
     R"""
     Geometric log-likelihood.
 
@@ -322,7 +322,7 @@ def logp(self, value):
                      0 <= p, p <= 1, value >= 1)
 
 
-class DiscreteUniform(UnivariateDiscrete):
+class DiscreteUniform(Univariate, Discrete):
     R"""
     Discrete uniform distribution.
 
@@ -373,7 +373,7 @@ def logp(self, value):
                      lower <= value, value <= upper)
 
 
-class Categorical(UnivariateDiscrete):
+class Categorical(Univariate, Discrete):
     R"""
     Categorical log-likelihood.
 
@@ -437,7 +437,7 @@ def logp(self, value):
                      sumto1)
 
 
-class ConstantDist(UnivariateDiscrete):
+class ConstantDist(Univariate, Discrete):
     """
     Constant log-likelihood.
 
@@ -472,8 +472,7 @@ def ConstantDist(*args, **kwargs):
                 DeprecationWarning)
     return Constant(*args, **kwargs)
 
-
-class ZeroInflatedPoisson(Discrete):
+class ZeroInflatedPoisson(Univariate, Discrete):
     R"""
     Zero-inflated Poisson log-likelihood.
 
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 4ddcea4170..4e3406714d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -42,32 +42,6 @@ def _as_tensor_shape_variable(var):
     return res
 
 
-def _as_tensor_shape_variable(var):
-    """ Just a collection of useful shape stuff from
-    `_infer_ndim_bcast` """
-
-    if var is None:
-        return T.constant([], dtype='int64')
-
-    res = var
-    if isinstance(res, (tuple, list)):
-        if len(res) == 0:
-            return T.constant([], dtype='int64')
-        res = T.as_tensor_variable(res, ndim=1)
-
-    else:
-        if res.ndim != 1:
-            raise TypeError("shape must be a vector or list of scalar, got\
-                            '%s'" % res)
-
-    if (not (res.dtype.startswith('int') or
-             res.dtype.startswith('uint'))):
-
-        raise TypeError('shape must be an integer vector or list',
-                        res.dtype)
-    return res
-
-
 class Distribution(object):
     """Statistical distribution"""
     def __new__(cls, name, *args, **kwargs):
@@ -186,23 +160,20 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
                 tuple(self.shape_ind) +\
                 tuple(self.shape_supp)
 
-        if testval is None:
-            if ndim_sum == 0:
-                testval = tt.constant(0, dtype=dtype)
-            else:
-                testval = tt.zeros(self.shape)
-
         self.ndim = tt.get_vector_length(self.shape)
-
-        self.testval = testval
         self.defaults = defaults
         self.transform = transform
+
+        if testval is None:
+            testval = self.get_test_value(defaults=self.defaults)
+
+        self.testval = testval
         self.type = tt.TensorType(str(dtype), bcast)
 
     def default(self):
-        return self.get_test_val(self.testval, self.defaults)
+        return self.get_test_value(self.testval, self.defaults)
 
-    def get_test_val(self, val, defaults):
+    def get_test_value(self, val=None, defaults=None):
         if val is None:
             for v in defaults:
                 the_attr = getattr(self, v, None)
@@ -216,9 +187,14 @@ def get_test_val(self, val, defaults):
                              str(defaults) + " pass testval argument or adjust so value is finite.")
 
     def getattr_value(self, val):
+        """ Attempts to obtain a non-symbolic value for an attribute
+        (potentially given in str form)
+        """
         if isinstance(val, string_types):
             val = getattr(self, val)
 
+        # Could use Theano's:
+        # val = theano.gof.op.get_test_value(val)
         if isinstance(val, tt.sharedvar.SharedVariable):
             return val.get_value()
         elif isinstance(val, tt.TensorVariable):
@@ -290,7 +266,7 @@ def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64',
         self.logp = logp
 
 
-class UnivariateContinuous(Continuous):
+class Univariate(Distribution):
 
     def __init__(self, dist_params, ndim=None, size=None, dtype=None,
                  bcast=None, *args, **kwargs):
@@ -312,30 +288,15 @@ def __init__(self, dist_params, ndim=None, size=None, dtype=None,
             dtype = tt.scal.upcast(*(tt.config.floatX,) + tuple(x.dtype for x in dist_params))
 
         # We just assume
-        super(UnivariateContinuous, self).__init__(
+        super(Univariate, self).__init__(
             tuple(), tuple(), size, bcast, *args, **kwargs)
 
 
-class MultivariateContinuous(Continuous):
+class Multivariate(Distribution):
 
     pass
 
 
-
-class MultivariateDiscrete(Discrete):
-
-    pass
-
-
-class UnivariateDiscrete(Discrete):
-
-    def __init__(self, ndim, size, bcast, *args, **kwargs):
-        self.shape_supp = ()
-
-        super(UnivariateDiscrete, self).__init__(
-            0, ndim, size, bcast, *args, **kwargs)
-
-
 def draw_values(params, point=None):
     """
     Draw (fix) parameter values. Handles a number of cases:
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 107cb66ff9..6f6f02de4c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -14,7 +14,7 @@
 from . import transforms
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
-from .distribution import MultivariateContinuous, MultivariateDiscrete, draw_values, generate_samples
+from .distribution import Multivariate, Continuous, Discrete, draw_values, generate_samples
 from .special import gammaln, multigammaln
 from .dist_math import bound, logpow, factln
 
@@ -58,7 +58,7 @@ def get_tau_cov(mu, tau=None, cov=None):
 
     return (tau, cov)
 
-class MvNormal(MultivariateContinuous):
+class MvNormal(Multivariate, Continuous):
     r"""
     Multivariate normal log-likelihood.
 
@@ -143,9 +143,10 @@ def logp(self, value):
         return -1 / 2. * result
 
 
-class MvStudentT(MultivariateContinuous):
-    R"""
-    Multivariate Student-T log-likelihood.
+class MvStudentT(Multivariate, Continuous):
+    """
+    Multivariate Student T log-likelihood.
+>>>>>>> started splitting up dist. classes; some small testval test changes
 
     .. math::
         f(\mathbf{x}| \nu,\mu,\Sigma) =
@@ -220,7 +221,7 @@ def logp(self, value):
         return log_pdf
 
 
-class Dirichlet(MultivariateContinuous):
+class Dirichlet(Multivariate, Continuous):
     R"""
     Dirichlet log-likelihood.
 
@@ -299,7 +300,7 @@ def logp(self, value):
                      broadcast_conditions=False)
 
 
-class Multinomial(MultivariateDiscrete):
+class Multinomial(Multivariate, Discrete):
     R"""
     Multinomial log-likelihood.
 
@@ -446,7 +447,7 @@ def __str__(self):
 matrix_pos_def = PosDefMatrix()
 
 
-class Wishart(MultivariateContinuous):
+class Wishart(Multivariate, Continuous):
     R"""
     Wishart log-likelihood.
 
@@ -616,7 +617,7 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
         return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
 
 
-class LKJCorr(MultivariateContinuous):
+class LKJCorr(Multivariate, Continuous):
     R"""
     The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood.
 
diff --git a/pymc3/model.py b/pymc3/model.py
index ad80cfe140..f06acdf9a2 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -498,22 +498,34 @@ def __init__(self, type=None, owner=None, index=None, name=None,
         owner : theano owner (optional)
         name : str
         distribution : Distribution
-        model : Model"""
+        model : Model
+        """
         if type is None:
             type = distribution.type
         super(FreeRV, self).__init__(type, owner, index, name)
 
         if distribution is not None:
             self.dshape = distribution.shape
-            self.dsize = tt.prod(distribution.shape)
+
+            const_dsize = 1
+            const_shape = tuple()
+            for s in distribution.shape:
+                try:
+                    shape_val = tt.get_scalar_constant_value(s)
+                    const_dsize *= shape_val
+                    const_shape += shape_val
+                except tt.NotScalarConstantError:
+                    const_dsize = None
+                    const_shape = None
+                    break
+
+            if const_dsize is None:
+                self.dsize = tt.prod(distribution.shape)
+            else:
+                self.dsize = const_dsize
+
             self.distribution = distribution
-            # TODO: why should we do this?  shouldn't the default/test_val
-            # of the distribution be enough?
-            # dist_test_val = distribution.default()
-            # self.tag.test_value = np.ones(
-            #     distribution.shape.tag.test_value, distribution.dtype) *
-            #     dist_test_val
-            self.tag.test_value = distribution.default()
+            self.tag.test_value = distribution.testval
             self.logp_elemwiset = distribution.logp(self)
             self.model = model
 

From e6e1b4a8895474a8ef00802c150665cd3c83306f Mon Sep 17 00:00:00 2001
From: brandon willard <brandonwillard@gmail.com>
Date: Tue, 21 Jun 2016 11:58:54 -0500
Subject: [PATCH 09/17] forced derived shape components to be tuples

---
 pymc3/distributions/multivariate.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 6f6f02de4c..dcffee6c16 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -102,8 +102,8 @@ def __init__(self, mu, tau, ndim=None, size=None, dtype=None, *args,
 
         self.median = self.mode = self.mean = self.mu
         # TODO: How do we want to use ndim?
-        shape_supp = self.mu.shape[-1]
-        shape_ind = self.mu.shape[:-1]
+        shape_supp = (self.mu.shape[-1],)
+        shape_ind = tuple(self.mu.shape[:-1])
 
         if self.mu.ndim > 0:
             bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
@@ -261,8 +261,8 @@ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None,
         self.dist_params = (self.a,)
 
         # TODO: How do we want to use ndim?
-        shape_supp = self.a.shape[-1]
-        shape_ind = self.a.shape[:-1]
+        shape_supp = (self.a.shape[-1],)
+        shape_ind = tuple(self.a.shape[:-1])
 
         # FIXME: this isn't correct/ideal
         if self.a.ndim > 0:
@@ -358,7 +358,7 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args,
 
         # TODO: check that n == len(p)?
         # TODO: How do we want to use ndim?
-        shape_supp = self.n
+        shape_supp = (self.n,)
         shape_ind = ()
 
         # FIXME: this isn't correct/ideal
@@ -500,12 +500,11 @@ def __init__(self, n, V, ndim=None, size=None, dtype=None, *args,
         self.dist_params = (self.n, self.V)
 
         # TODO: How do we want to use ndim?
-        shape_supp = self.V.shape[-1]
+        shape_supp = (self.V.shape[-1],)
         shape_ind = ()
 
-        self.mode = tt.switch(1 * (self.n >= shape_supp + 1), (self.n -
-                                                               shape_supp - 1)
-                              * self.V, np.nan)
+        self.mode = tt.switch(1 * (self.n >= shape_supp + 1),
+                              (self.n - shape_supp - 1) * self.V, np.nan)
         self.mean = self.n * self.V
 
         # FIXME: this isn't correct/ideal
@@ -662,9 +661,11 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p, ndim=0)
 
+        self.dist_params = (self.n, self.p)
+
         # TODO: How do we want to use ndim?
         n_elem = (self.p * (self.p - 1) / 2)
-        shape_supp = n_elem
+        shape_supp = (n_elem,)
         self.mean = tt.zeros(n_elem)
 
         # FIXME: triu, bcast, etc.
@@ -672,9 +673,6 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.tri_index[tt.triu(self.p, k=1)] = tt.arange(n_elem)
         self.tri_index[tt.triu(self.p, k=1)[::-1]] = tt.arange(n_elem)
 
-        self.dist_params = (self.n, self.p)
-
-        # TODO: do this correctly; what about replications?
         shape_ind = ()
 
         # FIXME: this isn't correct/ideal

From a8b26ec86b52cbd6fd271ec0917f39b02d5c7bce Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Mon, 2 Jan 2017 14:55:36 -0600
Subject: [PATCH 10/17] fixed typos from rebase

---
 pymc3/distributions/discrete.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 7c877877de..eafb7d56b7 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1,4 +1,5 @@
 from functools import partial
+import warnings
 
 import numpy as np
 import theano
@@ -421,6 +422,10 @@ def random_choice(k, *args, **kwargs):
 
         p, k = draw_values([self.p, self.k], point=point)
         return generate_samples(partial(random_choice, np.arange(k)),
+                                p=p,
+                                broadcast_shape=p.shape[:-1] or (1,),
+                                dist_shape=self.shape,
+                                size=size)
 
     def logp(self, value):
         p = self.p
@@ -437,7 +442,7 @@ def logp(self, value):
                      sumto1)
 
 
-class ConstantDist(Univariate, Discrete):
+class Constant(Univariate, Discrete):
     """
     Constant log-likelihood.
 
@@ -450,7 +455,7 @@ class ConstantDist(Univariate, Discrete):
     def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs):
         self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
         self.dist_params = (self.c,)
-        super(ConstantDist, self).__init__(
+        super(Constant, self).__init__(
             self.dist_params, ndim, size, dtype, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
@@ -467,6 +472,7 @@ def logp(self, value):
         c = self.c
         return bound(0, tt.eq(value, c))
 
+
 def ConstantDist(*args, **kwargs):
     warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.",
                 DeprecationWarning)
@@ -525,7 +531,7 @@ def logp(self, value):
                          tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
 
 
-class ZeroInflatedNegativeBinomial(UnivariateDiscrete):
+class ZeroInflatedNegativeBinomial(Univariate, Discrete):
     R"""
     Zero-Inflated Negative binomial log-likelihood.
 

From 72e025e03760451a87e2608408fb79efebbe5e93 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Mon, 2 Jan 2017 17:23:53 -0600
Subject: [PATCH 11/17] removed unnecessary shape args in univariates; added
 check and shape evaluation step

---
 pymc3/distributions/continuous.py   | 173 ++++++++++++----------------
 pymc3/distributions/distribution.py |  56 ++++++---
 2 files changed, 113 insertions(+), 116 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 1a02d4b608..3c91fafbba 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -49,14 +49,14 @@ def __init__(self, *args, **kwargs):
         super(UnitUnivariateContinuous, self).__init__(transform=transform,
                                                        *args, **kwargs)
 
+
 def assert_negative_support(var, label, distname, value=-1e-6):
     # Checks for evidence of positive support for a variable
     if var is None:
         return
     try:
         # Transformed distribution
-        support = np.isfinite(var.transformed.distribution.dist
-                                .logp(value).tag.test_value)
+        support = np.isfinite(var.transformed.distribution.dist.logp(value).tag.test_value)
     except AttributeError:
         try:
             # Untransformed distribution
@@ -72,7 +72,7 @@ def assert_negative_support(var, label, distname, value=-1e-6):
 
 
 def get_tau_sd(tau=None, sd=None):
-    """
+    r"""
     Find precision and standard deviation
 
     .. math::
@@ -134,19 +134,17 @@ class Uniform(Univariate, Continuous):
         Upper limit.
     """
 
-    def __init__(self, lower=0, upper=1, transform='interval', size=None,
-                 ndim=None, dtype=None, *args, **kwargs):
+    def __init__(self, lower=0, upper=1, transform='interval', *args, **kwargs):
 
         lower = tt.as_tensor_variable(lower)
         upper = tt.as_tensor_variable(upper)
-        self.dist_params = (lower, upper)
         self.lower = lower
         self.upper = upper
         self.mean = (upper + lower) / 2.
         self.median = self.mean
 
-        super(Uniform, self).__init__(self.dist_params, ndim, size, dtype,
-                                      *args, **kwargs)
+        dist_params = (self.lower, self.upper)
+        super(Uniform, self).__init__(dist_params, *args, **kwargs)
 
         if transform == 'interval':
             self.transform = transforms.interval(lower, upper)
@@ -163,7 +161,7 @@ def logp(self, value):
         lower = self.lower
         upper = self.upper
         return bound(-tt.log(upper - lower),
-                              value >= lower, value <= upper)
+                             value >= lower, value <= upper)
 
 
 class Flat(Univariate, Continuous):
@@ -172,13 +170,12 @@ class Flat(Univariate, Continuous):
     the passed value.
     """
 
-    def __init__(self, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
 
         self.median = tt.as_tensor_variable(0.)
-        self.dist_params = (self.median,)
+        dist_params = (self.median,)
 
-        super(Flat, self).__init__(self.dist_params, ndim, size, dtype, *args,
-                                   **kwargs)
+        super(Flat, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         raise ValueError('Cannot sample from Flat distribution')
@@ -220,30 +217,18 @@ class Normal(Univariate, Continuous):
     tau : float
         Precision (tau > 0).
     """
-    def __init__(self, *args, **kwargs):
+
+    def __init__(self, mu=0.0, sd=None, tau=None, *args, **kwargs):
+
         # FIXME In order to catch the case where Normal('x', 0, .1) is
         # called to display a warning we have to fetch the args and
         # kwargs manually.  After a certain period we should revert
         # back to the old calling signature.
 
-        if len(args) == 1:
-            mu = args[0]
-            sd = kwargs.pop('sd', None)
-            tau = kwargs.pop('tau', None)
-        elif len(args) == 2:
-            warnings.warn(('The order of positional arguments to Normal()'
-                           'has changed. The new signature is:'
-                           'Normal(name, mu, sd) instead of Normal(name, mu, tau).'),
-                          DeprecationWarning)
-            mu, sd = args
-            tau = kwargs.pop('tau', None)
-        else:
-            mu = kwargs.pop('mu', 0.)
-            sd = kwargs.pop('sd', None)
-            tau = kwargs.pop('tau', None)
-
-    def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None,
-                 dtype=None, *args, **kwargs):
+        warnings.warn(('The order of positional arguments to Normal()'
+                       'has changed. The new signature is:'
+                       'Normal(name, mu, sd) instead of Normal(name, mu, tau).'),
+                      DeprecationWarning)
 
         mu = tt.as_tensor_variable(mu)
         self.mean = self.median = self.mode = self.mu = mu
@@ -253,10 +238,9 @@ def __init__(self, mu=0.0, tau=None, sd=None, ndim=None, size=None,
         assert_negative_support(sd, 'sd', 'Normal')
         assert_negative_support(tau, 'tau', 'Normal')
 
-        self.dist_params = (self.mu, self.tau)
+        dist_params = (self.mu, self.tau)
 
-        super(Normal, self).__init__(self.dist_params, ndim, size, dtype,
-                                     *args, **kwargs)
+        super(Normal, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, tau, sd = draw_values([self.mu, self.tau, self.sd],
@@ -296,8 +280,8 @@ class HalfNormal(PositiveUnivariateContinuous):
     tau : float
         Precision (tau > 0).
     """
-    def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None,
-                 *args, **kwargs):
+
+    def __init__(self, tau=None, sd=None, *args, **kwargs):
 
         self.tau, self.sd = get_tau_sd(tau=tau, sd=sd)
         self.mean = tt.sqrt(2 / (np.pi * self.tau))
@@ -306,10 +290,9 @@ def __init__(self, tau=None, sd=None, ndim=None, size=None, dtype=None,
         assert_negative_support(tau, 'tau', 'HalfNormal')
         assert_negative_support(sd, 'sd', 'HalfNormal')
 
-        self.dist_params = (self.tau,)
+        dist_params = (self.tau,)
 
-        super(HalfNormal, self).__init__(self.dist_params, ndim, size, dtype,
-                                         *args, **kwargs)
+        super(HalfNormal, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         sd = draw_values([self.sd], point=point)
@@ -381,8 +364,7 @@ class Wald(PositiveUnivariateContinuous):
         The American Statistician, Vol. 30, No. 2, pp. 88-90
     """
 
-    def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None,
-                 size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs):
 
         self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi)
         self.alpha = alpha
@@ -395,10 +377,9 @@ def __init__(self, mu=None, lam=None, phi=None, alpha=0., ndim=None,
         assert_negative_support(mu, 'mu', 'Wald')
         assert_negative_support(lam, 'lam', 'Wald')
 
-        self.dist_params = (self.mu, self.lam, self.alpha)
+        dist_params = (self.mu, self.lam, self.alpha)
 
-        super(Wald, self).__init__(self.dist_params, ndim, size, dtype, *args,
-                                   **kwargs)
+        super(Wald, self).__init__(dist_params, *args, **kwargs)
 
     def get_mu_lam_phi(self, mu, lam, phi):
         res = None
@@ -493,8 +474,7 @@ class Beta(UnitUnivariateContinuous):
     the binomial distribution.
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
-                 size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -505,10 +485,9 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
         assert_negative_support(alpha, 'alpha', 'Beta')
         assert_negative_support(beta, 'beta', 'Beta')
 
-        self.dist_params = (self.alpha, self.beta)
+        dist_params = (self.alpha, self.beta)
 
-        super(Beta, self).__init__(self.dist_params, ndim, size, dtype, *args,
-                                   **kwargs)
+        super(Beta, self).__init__(dist_params, *args, **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -559,17 +538,17 @@ class Exponential(PositiveUnivariateContinuous):
         Rate or inverse scale (lam > 0)
     """
 
-    def __init__(self, lam, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, lam, *args, **kwargs):
         self.lam = lam
         self.mean = 1. / lam
         self.median = self.mean * tt.log(2)
         self.mode = 0
 
-        self.dist_params = (self.lam,)
+        dist_params = (self.lam,)
 
         self.variance = lam**-2
 
-        super(Exponential, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Exponential, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         lam = draw_values([self.lam], point=point)
@@ -605,7 +584,7 @@ class Laplace(Univariate, Continuous):
         Scale parameter (b > 0).
     """
 
-    def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu, b, *args, **kwargs):
         self.b = tt.as_tensor_variable(b)
         self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(
             mu)
@@ -613,10 +592,9 @@ def __init__(self, mu, b, ndim=None, size=None, dtype=None, *args, **kwargs):
 
         assert_negative_support(b, 'b', 'Laplace')
 
-        self.dist_params = (self.b, self.mu)
+        dist_params = (self.b, self.mu)
 
-        super(Laplace, self).__init__(self.dist_params, ndim, size, dtype,
-                                      *args, **kwargs)
+        super(Laplace, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, b = draw_values([self.mu, self.b], point=point)
@@ -659,7 +637,8 @@ class Lognormal(PositiveUnivariateContinuous):
     tau : float
         Scale parameter (tau > 0).
     """
-    def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args,
+
+    def __init__(self, mu=0, tau=1, *args,
                  **kwargs):
         self.mu = tt.as_tensor_variable(mu)
         self.tau = tt.as_tensor_variable(tau)
@@ -671,10 +650,9 @@ def __init__(self, mu=0, tau=1, ndim=None, size=None, dtype=None, *args,
         assert_negative_support(tau, 'tau', 'Lognormal')
         assert_negative_support(sd, 'sd', 'Lognormal')
 
-        self.dist_params = (self.mu, self.tau)
+        dist_params = (self.mu, self.tau)
 
-        super(Lognormal, self).__init__(self.dist_params, ndim, size, dtype,
-                                        *args, **kwargs)
+        super(Lognormal, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, mu, tau, size=None):
         samples = np.random.normal(size=size)
@@ -736,8 +714,8 @@ def __init__(self, nu, mu=0, lam=None, sd=None, ndim=None, size=None,
         assert_negative_support(lam, 'lam (sd)', 'StudentT')
         assert_negative_support(nu, 'nu', 'StudentT')
 
-        self.dist_params = (self.nu, self.mu, self.lam)
-        super(StudentT, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        dist_params = (self.nu, self.mu, self.lam)
+        super(StudentT, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         nu, mu, lam = draw_values([self.nu, self.mu, self.lam],
@@ -784,7 +762,8 @@ class Pareto(PositiveUnivariateContinuous):
     m : float
         Scale parameter (m > 0).
     """
-    def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, alpha, m, *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.m = tt.as_tensor_variable(m)
         self.mean = tt.switch(tt.gt(self.alpha, 1), self.alpha * self.m / (self.alpha - 1.), np.inf)
@@ -797,10 +776,9 @@ def __init__(self, alpha, m, ndim=None, size=None, dtype=None, *args, **kwargs):
         assert_negative_support(alpha, 'alpha', 'Pareto')
         assert_negative_support(m, 'm', 'Pareto')
 
-        self.dist_params = (self.alpha, self.m)
+        dist_params = (self.alpha, self.m)
 
-        super(Pareto, self).__init__(self.dist_params, ndim, size, dtype,
-                                     *args, **kwargs)
+        super(Pareto, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, alpha, m, size=None):
         u = np.random.uniform(size=size)
@@ -847,14 +825,13 @@ class Cauchy(Univariate, Continuous):
         Scale parameter > 0
     """
 
-    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha, beta, *args, **kwargs):
         self.median = self.mode = self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
 
-        self.dist_params = (self.alpha, self.beta)
+        dist_params = (self.alpha, self.beta)
 
-        super(Cauchy, self).__init__(self.dist_params, ndim, size, dtype,
-                                     *args, **kwargs)
+        super(Cauchy, self).__init__(dist_params, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'Cauchy')
 
@@ -898,15 +875,14 @@ class HalfCauchy(PositiveUnivariateContinuous):
         Scale parameter (beta > 0).
     """
 
-    def __init__(self, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, beta, *args, **kwargs):
         self.mode = tt.as_tensor_variable(0.)
         self.beta = tt.as_tensor_variable(beta)
         self.median = beta
 
-        self.dist_params = (self.beta,)
+        dist_params = (self.beta,)
 
-        super(HalfCauchy, self).__init__(self.dist_params, ndim, size, dtype,
-                                         *args, **kwargs)
+        super(HalfCauchy, self).__init__(dist_params, *args, **kwargs)
 
         assert_negative_support(beta, 'beta', 'HalfCauchy')
 
@@ -966,8 +942,7 @@ class Gamma(PositiveUnivariateContinuous):
         Alternative scale parameter (sd > 0).
     """
 
-    def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
-                 size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs):
         alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd)
         self.alpha = alpha
         self.beta = beta
@@ -978,10 +953,9 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, ndim=None,
         assert_negative_support(alpha, 'alpha', 'Gamma')
         assert_negative_support(beta, 'beta', 'Gamma')
 
-        self.dist_params = (self.alpha, self.beta)
+        dist_params = (self.alpha, self.beta)
 
-        super(Gamma, self).__init__(self.dist_params, ndim, size, dtype, *args,
-                                    **kwargs)
+        super(Gamma, self).__init__(dist_params, *args, **kwargs)
 
     def get_alpha_beta(self, alpha=None, beta=None, mu=None, sd=None):
         if (alpha is not None) and (beta is not None):
@@ -1039,7 +1013,7 @@ class InverseGamma(PositiveUnivariateContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-    def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha, beta=1., *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
         self.mean = (alpha > 1) * beta / (alpha - 1.) or np.inf
@@ -1050,9 +1024,9 @@ def __init__(self, alpha, beta=1., ndim=None, size=None, dtype=None, *args, **kw
         assert_negative_support(alpha, 'alpha', 'InverseGamma')
         assert_negative_support(beta, 'beta', 'InverseGamma')
 
-        self.dist_params = (self.alpha, self.beta)
+        dist_params = (self.alpha, self.beta)
 
-        super(InverseGamma, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(InverseGamma, self).__init__(dist_params, *args, **kwargs)
 
     def _calculate_mean(self):
         m = self.beta / (self.alpha - 1.)
@@ -1127,7 +1101,7 @@ class Weibull(PositiveUnivariateContinuous):
     beta : float
         Scale parameter (beta > 0).
     """
-    def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha, beta, *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
         self.mean = beta * tt.exp(gammaln(1 + 1. / alpha))
@@ -1138,10 +1112,9 @@ def __init__(self, alpha, beta, ndim=None, size=None, dtype=None, *args, **kwarg
         assert_negative_support(alpha, 'alpha', 'Weibull')
         assert_negative_support(beta, 'beta', 'Weibull')
 
-        self.dist_params = (self.alpha, self.beta)
+        dist_params = (self.alpha, self.beta)
 
-        super(Weibull, self).__init__(self.dist_params, ndim, size, dtype,
-                                      *args, **kwargs)
+        super(Weibull, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         alpha, beta = draw_values([self.alpha, self.beta],
@@ -1273,11 +1246,13 @@ def dist(cls, *args, **kwargs):
 
 def StudentTpos(*args, **kwargs):
     warnings.warn("StudentTpos has been deprecated. In future, use HalfStudentT instead.",
-                DeprecationWarning)
+                  DeprecationWarning)
     return HalfStudentT(*args, **kwargs)
 
+
 HalfStudentT = Bound(StudentT, lower=0)
 
+
 class ExGaussian(Univariate, Continuous):
     R"""
     Exponentially modified Gaussian log-likelihood.
@@ -1323,7 +1298,7 @@ class ExGaussian(Univariate, Continuous):
         Vol. 4, No. 1, pp 35-45.
     """
 
-    def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu, sigma, nu, *args, **kwargs):
         self.mu = tt.as_tensor_variable(mu)
         self.sigma = tt.as_tensor_variable(sigma)
         self.nu = tt.as_tensor_variable(nu)
@@ -1333,10 +1308,9 @@ def __init__(self, mu, sigma, nu, ndim=None, size=None, dtype=None, *args, **kwa
         assert_negative_support(sigma, 'sigma', 'ExGaussian')
         assert_negative_support(nu, 'nu', 'ExGaussian')
 
-        self.dist_params = (self.mu, self.sigma, self.nu)
+        dist_params = (self.mu, self.sigma, self.nu)
 
-        super(ExGaussian, self).__init__(self.dist_params, ndim, size, dtype,
-                                         *args, **kwargs)
+        super(ExGaussian, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu],
@@ -1388,16 +1362,14 @@ class VonMises(Univariate, Continuous):
         Concentration (\frac{1}{kappa} is analogous to \sigma^2).
     """
 
-    def __init__(self, mu=0.0, kappa=None, transform='circular', ndim=None,
-                 size=None, dtype=None, *args, **kwargs):
+    def __init__(self, mu=0.0, kappa=None, transform='circular', *args, **kwargs):
         self.mean = self.median = self.mode = self.mu = tt.as_tensor_variable(mu)
         self.kappa = tt.as_tensor_variable(kappa)
         self.variance = 1. - i1(self.kappa) / i0(self.kappa)
 
-        self.dist_params = (self.mu, self.kappa)
+        dist_params = (self.mu, self.kappa)
 
-        super(VonMises, self).__init__(self.dist_params, ndim, size, dtype,
-                                       *args, **kwargs)
+        super(VonMises, self).__init__(dist_params, *args, **kwargs)
 
         if transform == 'circular':
             self.transform = transforms.Circular()
@@ -1414,7 +1386,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         kappa = self.kappa
-        return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0)
+        return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)),
+                     value >= -np.pi, value <= np.pi, kappa >= 0)
 
 
 class SkewNormal(Continuous):
@@ -1456,6 +1429,7 @@ class SkewNormal(Continuous):
     approaching plus/minus infinite we get a half-normal distribution.
 
     """
+
     def __init__(self, mu=0.0, sd=None, tau=None, alpha=1,  *args, **kwargs):
         super(SkewNormal, self).__init__(ndim=1, *args, **kwargs)
         self.mu = mu
@@ -1481,8 +1455,7 @@ def logp(self, value):
         mu = self.mu
         alpha = self.alpha
         return bound(
-            tt.log(1 +
-            tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
+            tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
             + (-tau * (value - mu)**2
             + tt.log(tau / np.pi / 2.)) / 2.,
             tau > 0, sd > 0)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 4e3406714d..afb80e76fe 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,6 +1,9 @@
+import collections
+
 import numpy as np
 import theano.tensor as tt
 from theano import function
+from theano.gof import Constant
 from theano.tensor.raw_random import _infer_ndim_bcast
 
 from ..memoize import memoize
@@ -16,9 +19,11 @@
 class _Unpickling(object):
     pass
 
+
 def _as_tensor_shape_variable(var):
-    """ Just a collection of useful shape stuff from
-    `_infer_ndim_bcast` """
+    r""" Just some useful shape object parsing steps.
+    Copied from `_infer_ndim_bcast`.
+    """
 
     if var is None:
         return tt.constant([], dtype='int64')
@@ -42,6 +47,24 @@ def _as_tensor_shape_variable(var):
     return res
 
 
+def has_const_inputs(nodes):
+    r"""Checks that nodes have only constant inputs for
+    their Ops.  Useful for justifying one-time evals.
+    """
+    if not isinstance(nodes, collections.Iterable):
+        nodes = [nodes]
+
+    for node in nodes:
+        owner = getattr(node, 'owner', None)
+        if owner is not None:
+            if not has_const_inputs(owner.inputs):
+                return False
+        elif not isinstance(node, Constant):
+            return False
+
+    return True
+
+
 class Distribution(object):
     """Statistical distribution"""
     def __new__(cls, name, *args, **kwargs):
@@ -159,6 +182,12 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
             self.shape = tuple(self.shape_reps) +\
                 tuple(self.shape_ind) +\
                 tuple(self.shape_supp)
+            self.shape = tt.as_tensor_variable(self.shape)
+
+        if has_const_inputs(self.shape):
+            # FIXME: This feels like a hack.  Seems like it would be better to
+            # evaluate somewhere else (e.g. exactly where a value is needed).
+            self.shape = self.shape.eval()
 
         self.ndim = tt.get_vector_length(self.shape)
         self.defaults = defaults
@@ -270,30 +299,25 @@ class Univariate(Distribution):
 
     def __init__(self, dist_params, ndim=None, size=None, dtype=None,
                  bcast=None, *args, **kwargs):
+        r"""This constructor automates some of the shape determination, since
+        univariate distributions are simple in that regard.
         """
 
-        Parameters
-        ==========
-
-        dist_params
-            list or tuple of parameters to use for shape inference
-
-        """
-
-        self.shape_supp = ()
-        dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
+        self.dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
 
-        ndim, size, bcast = _infer_ndim_bcast(*(ndim, size) + dist_params)
+        ndim, size, bcast = _infer_ndim_bcast(*((ndim, size) +
+                                                self.dist_params))
         if dtype is None:
-            dtype = tt.scal.upcast(*(tt.config.floatX,) + tuple(x.dtype for x in dist_params))
+            dtype = tt.scal.upcast(*((tt.config.floatX,) +
+                                     tuple(x.dtype for x in self.dist_params)))
 
-        # We just assume
         super(Univariate, self).__init__(
             tuple(), tuple(), size, bcast, *args, **kwargs)
 
 
 class Multivariate(Distribution):
-
+    r"""TODO: Automate some of the multivariate shape determination?
+    """
     pass
 
 

From c8a578c79503db4baddbd444a5f3a423fc99825c Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Mon, 2 Jan 2017 19:24:01 -0600
Subject: [PATCH 12/17] fixed some shape issues in transform and advi logic

---
 pymc3/distributions/distribution.py |  7 +++++--
 pymc3/distributions/transforms.py   | 24 +++++++++++++++++-------
 pymc3/variational/advi.py           | 14 +++++++-------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index afb80e76fe..8489ed7b3d 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -94,7 +94,7 @@ def dist(cls, *args, **kwargs):
         return dist
 
     def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
-                 testval=None, defaults=None, transform=None):
+                 testval=None, defaults=None, transform=None, *args, **kwargs):
         r"""
         Distributions are specified in terms of the shape of their support, the shape
         of the space of independent instances and the shape of the space of replications.
@@ -175,6 +175,9 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
         self.shape_reps = _as_tensor_shape_variable(shape_reps)
         self.ndim_reps = tt.get_vector_length(self.shape_reps)
 
+        self.bcast = bcast
+        self.dtype = dtype
+
         ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
         if ndim_sum == 0:
             self.shape = tt.constant([], dtype='int64')
@@ -197,7 +200,7 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
             testval = self.get_test_value(defaults=self.defaults)
 
         self.testval = testval
-        self.type = tt.TensorType(str(dtype), bcast)
+        self.type = tt.TensorType(str(dtype), self.bcast)
 
     def default(self):
         return self.get_test_value(self.testval, self.defaults)
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 4234d4530b..c02779e07c 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -46,13 +46,14 @@ class TransformedDistribution(Distribution):
     """A distribution that has been transformed from one space into another."""
 
     def __init__(self, dist, transform, *args, **kwargs):
-        """
+        r"""
         Parameters
         ----------
         dist : Distribution
+            TODO
         transform : Transform
-        args, kwargs
-            arguments to Distribution"""
+            TODO
+        """
         forward = transform.forward
         testval = forward(dist.default())
 
@@ -61,9 +62,18 @@ def __init__(self, dist, transform, *args, **kwargs):
         v = forward(FreeRV(name='v', distribution=dist))
         self.type = v.type
 
+        # We can get the transformed support shape from a single dummy var in
+        # only the support (i.e. without the independent or replication dimensions).
+        shape_supp = forward(tt.alloc(1, *dist.shape_supp)).shape
+
+        # XXX: We assume these two shapes don't change under a transform.
+        shape_ind = dist.shape_ind
+        shape_reps = dist.shape_reps
+
         super(TransformedDistribution, self).__init__(
-            v.shape.tag.test_value, v.dtype,
-            testval, dist.defaults, *args, **kwargs)
+              shape_supp, shape_ind, shape_reps,
+              v.broadcastable, v.dtype,
+              testval.tag.test_value, dist.defaults, *args, **kwargs)
 
         if transform.name == 'stickbreaking':
             b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False))
@@ -193,7 +203,7 @@ class StickBreaking(Transform):
     Parameters
     ----------
     eps : float, positive value
-        A small value for numerical stability in invlogit. 
+        A small value for numerical stability in invlogit.
     """
 
     name = "stickbreaking"
@@ -250,7 +260,7 @@ def backward(self, y):
 
     def forward(self, x):
         return x
-        
+
     def jacobian_det(self, x):
         return 0
 
diff --git a/pymc3/variational/advi.py b/pymc3/variational/advi.py
index 67889529b0..879df2d72e 100644
--- a/pymc3/variational/advi.py
+++ b/pymc3/variational/advi.py
@@ -208,11 +208,11 @@ def logp_(input):
         r = MRG_RandomStreams(seed=random_seed)
 
     if n_mcsamples == 1:
-        n = r.normal(size=inarray.tag.test_value.shape)
+        n = r.normal(size=np.shape(inarray.tag.test_value))
         q = n * tt.exp(w) + u
         elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))
     else:
-        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
+        n = r.normal(size=(n_mcsamples, np.shape(u.tag.test_value)[0]))
         qs = n * tt.exp(w) + u
         logps, _ = theano.scan(fn=lambda q: logp_(q),
                                outputs_info=None,
@@ -255,7 +255,7 @@ def optimizer(loss, param):
             i_int = i.astype('int64')
             value = param_.get_value(borrow=True)
             accu = theano.shared(
-                np.zeros(value.shape + (n_win,), dtype=value.dtype))
+                np.zeros(np.shape(value) + (n_win,), dtype=value.dtype))
             grad = tt.grad(loss, param_)
 
             # Append squared gradient vector to accu_new
@@ -324,17 +324,17 @@ def rvs(x):
     for v in global_RVs:
         u = theano.shared(vparams['means'][str(v)]).ravel()
         w = theano.shared(vparams['stds'][str(v)]).ravel()
-        n = r.normal(size=u.tag.test_value.shape)
-        updates.update({v: (n * w + u).reshape(v.tag.test_value.shape)})
+        n = r.normal(size=np.shape(u.tag.test_value))
+        updates.update({v: (n * w + u).reshape(np.shape(v.tag.test_value))})
 
     if local_RVs is not None:
         for v_, (uw, _) in local_RVs.items():
             v = get_transformed(v_)
             u = uw[0].ravel()
             w = uw[1].ravel()
-            n = r.normal(size=u.tag.test_value.shape)
+            n = r.normal(size=np.shape(u.tag.test_value))
             updates.update(
-                {v: (n * tt.exp(w) + u).reshape(v.tag.test_value.shape)})
+                {v: (n * tt.exp(w) + u).reshape(np.shape(v.tag.test_value))})
 
     # Replace some nodes of the graph with variational distributions
     vars = model.free_RVs

From 30133e1a09f41859f689587f27ec0b633fb8bc64 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Mon, 2 Jan 2017 22:18:55 -0600
Subject: [PATCH 13/17] started shape handling updates for missing data

---
 pymc3/distributions/distribution.py |  4 ++--
 pymc3/model.py                      | 37 +++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8489ed7b3d..8938bc3cf4 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -16,6 +16,7 @@
            'MultivariateContinuous', 'UnivariateContinuous',
            'MultivariateDiscrete', 'UnivariateDiscrete']
 
+
 class _Unpickling(object):
     pass
 
@@ -250,8 +251,7 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
                  testval=None, defaults=[], transform=None, parent_dist=None,
                  *args, **kwargs):
         super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps,
-                                             bcast, dtype=dtype,
-                                             testval=testval,
+                                             bcast, dtype, testval=testval,
                                              defaults=defaults, *args,
                                              **kwargs)
         self.parent_dist = parent_dist
diff --git a/pymc3/model.py b/pymc3/model.py
index f06acdf9a2..12d657cef2 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -557,17 +557,44 @@ def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype
     data = pandas_to_array(data).astype(dtype)
 
+    missing_in_dims = 0
     if hasattr(data, 'mask'):
+        missing_in_dims = np.ma.count_masked(data, axis=0)
+
+    if not np.all(missing_in_dims == 0):
         from .distributions import NoDistribution
-        testval = distribution.testval or data.mean().astype(dtype)
-        fakedist = NoDistribution.dist(shape=data.mask.sum(), dtype=dtype,
-                                       testval=testval, parent_dist=distribution)
+
+        # XXX: There was an `or` between these two values; was this a
+        # condition on None values (as interpreted below), or some kind of
+        # bitwise operation on the values?
+        testval = distribution.testval
+        if testval is None:
+            testval = data.mean()
+        testval = testval.astype(dtype)
+
+        # FIXME: What are the correct values for these?  Does it
+        # really matter?
+        shape_supp = tuple()
+        shape_ind = tuple()
+        missing_count = np.sum(missing_in_dims)
+        shape_reps = tt.as_tensor_variable(np.atleast_1d(missing_count))
+
+        bcast = (False,)
+
+        # XXX: If these missing value distributions are being used
+        # directly, then the result will be incorrect when
+        # the parent distribution is multivariate and the missing values were
+        # found about the support and/or independent dimensions.
+        fakedist = NoDistribution.dist(shape_supp, shape_ind, shape_reps,
+                                       bcast, dtype,
+                                       testval=testval[data.mask.nonzero()],
+                                       parent_dist=distribution)
         missing_values = FreeRV(name=name + '_missing', distribution=fakedist,
                                 model=model)
         constant = tt.as_tensor_variable(data.filled())
 
-        dataTensor = tt.set_subtensor(
-            constant[data.mask.nonzero()], missing_values)
+        dataTensor = tt.set_subtensor(constant[data.mask.nonzero()],
+                                      missing_values)
         dataTensor.missing_values = missing_values
         return dataTensor
     else:

From 842277c7482051fa7fe23101910ee35c4ebbee52 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Tue, 3 Jan 2017 15:54:38 -0600
Subject: [PATCH 14/17] refactored discrete dist. constructors; added support
 for (depricated) shape argument

---
 pymc3/distributions/discrete.py     | 139 ++++++++++++++++------------
 pymc3/distributions/distribution.py |  43 ++++++++-
 2 files changed, 120 insertions(+), 62 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index eafb7d56b7..b78a7995f7 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -7,9 +7,14 @@
 from scipy import stats
 
 from .dist_math import bound, factln, binomln, betaln, logpow
-from .distribution import Univariate, Discrete, draw_values, generate_samples
-
-__all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'Poisson',
+from .distribution import (
+    Univariate,
+    Discrete,
+    draw_values,
+    generate_samples,
+    has_const_inputs)
+
+__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'Poisson',
            'NegativeBinomial', 'ConstantDist', 'Constant', 'ZeroInflatedPoisson',
            'ZeroInflatedNegativeBinomial', 'DiscreteUniform', 'Geometric',
            'Categorical']
@@ -38,15 +43,16 @@ class Binomial(Univariate, Discrete):
     p : float
         Probability of success in each trial (0 < p < 1).
     """
-    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, n, p, *args, **kwargs):
         self.n = tt.as_tensor_variable(n)
         self.p = tt.as_tensor_variable(p)
-        self.mode = tt.cast(tt.round(n * p), self.dtype)
 
-        self.dist_params = (self.n, self.p)
+        self.mode = tt.cast(tt.round(n * p), 'int32')
 
-        super(Binomial, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        dist_params = (self.n, self.p)
+
+        super(Binomial, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         n, p = draw_values([self.n, self.p], point=point)
@@ -93,16 +99,17 @@ class BetaBinomial(Univariate, Discrete):
         beta > 0.
     """
 
-    def __init__(self, alpha, beta, n, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, alpha, beta, n, *args, **kwargs):
         self.alpha = tt.as_tensor_variable(alpha)
         self.beta = tt.as_tensor_variable(beta)
         self.n = tt.as_tensor_variable(n)
-        self.mode = tt.cast(tt.round(alpha / (alpha + beta)), 'int8')
 
-        self.dist_params = (self.alpha, self.beta, self.n)
+        self.mode = tt.cast(tt.round(self.alpha / (self.alpha + self.beta)),
+                            'int8')
+
+        dist_params = (self.alpha, self.beta, self.n)
 
-        super(BetaBinomial, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(BetaBinomial, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, alpha, beta, n, size=None):
         size = size or 1
@@ -152,14 +159,15 @@ class Bernoulli(Univariate, Discrete):
     p : float
         Probability of success (0 < p < 1).
     """
-    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, p, *args, **kwargs):
         self.p = tt.as_tensor_variable(p)
+
+        dist_params = (self.p,)
+
         self.mode = tt.cast(tt.round(p), 'int8')
 
-        self.dist_params = (self.p,)
-        # FIXME: bcast, etc.
-        super(Bernoulli, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Bernoulli, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -201,14 +209,15 @@ class Poisson(Univariate, Discrete):
     The Poisson distribution can be derived as a limiting case of the
     binomial distribution.
     """
-    def __init__(self, mu, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, mu, *args, **kwargs):
         self.mu = tt.as_tensor_variable(mu)
+
         self.mode = tt.floor(mu).astype('int32')
 
-        self.dist_params = (self.mu,)
+        dist_params = (self.mu,)
 
-        super(Poisson, self).__init__(self.dist_params,
-                                      ndim, size, dtype, *args, **kwargs)
+        super(Poisson, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu = draw_values([self.mu], point=point)
@@ -251,15 +260,17 @@ class NegativeBinomial(Univariate, Discrete):
     alpha : float
         Gamma distribution parameter (alpha > 0).
     """
-    def __init__(self, mu, alpha, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, mu, alpha, *args, **kwargs):
         self.mu = tt.as_tensor_variable(mu)
         self.alpha = tt.as_tensor_variable(alpha)
+
         self.mode = tt.floor(mu).astype('int32')
 
-        self.dist_params = (self.mu, self.alpha)
+        dist_params = (self.mu, self.alpha)
 
-        super(NegativeBinomial, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(NegativeBinomial, self).__init__(dist_params,
+                                               *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, alpha = draw_values([self.mu, self.alpha], point=point)
@@ -303,13 +314,16 @@ class Geometric(Univariate, Discrete):
     p : float
         Probability of success on an individual trial (0 < p <= 1).
     """
-    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, p, *args, **kwargs):
         self.p = tt.as_tensor_variable(p)
+
         self.mode = tt.as_tensor_variable(1)
-        self.dist_params = (self.p,)
+
+        dist_params = (self.p,)
 
         super(Geometric, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+              dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         p = draw_values([self.p], point=point)
@@ -343,15 +357,16 @@ class DiscreteUniform(Univariate, Discrete):
         Upper limit (upper > lower).
     """
 
-    def __init__(self, lower, upper, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, lower, upper, *args, **kwargs):
         self.lower = tt.floor(lower).astype('int32')
         self.upper = tt.floor(upper).astype('int32')
+
         self.mode = tt.maximum(
             tt.floor((upper - lower) / 2.).astype('int32'), self.lower)
-        self.dist_params = (self.lower, self.upper)
 
-        super(DiscreteUniform, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        dist_params = (self.lower, self.upper)
+
+        super(DiscreteUniform, self).__init__(dist_params, *args, **kwargs)
 
     def _random(self, lower, upper, size=None):
         # This way seems to be the only to deal with lower and upper
@@ -392,23 +407,24 @@ class Categorical(Univariate, Discrete):
         p > 0 and the elements of p must sum to 1. They will be automatically
         rescaled otherwise.
     """
-    def __init__(self, p, ndim=None, size=None, dtype=None, *args, **kwargs):
 
-        try:
-            self.k = tt.shape(p)[-1].tag.test_value
-        except AttributeError:
-            self.k = tt.shape(p)[-1]
+    def __init__(self, p, *args, **kwargs):
+
+        self.k = tt.shape(p)[-1]
+
+        if has_const_inputs(self.k):
+            # FIXME: This feels like a hack.  Seems like it would be better to
+            # evaluate somewhere else (e.g. exactly where a value is needed).
+            self.k = self.k.eval()
+
         self.p = tt.as_tensor_variable(p)
         self.p = (p.T / tt.sum(p, -1)).T
-        self.mode = tt.argmax(p)
 
-        self.shape_support = tt.shape(self.p)[-1]
+        self.mode = tt.argmax(p)
 
-        self.dist_params = (self.p,)
-        # FIXME: The stated support is univariate?
+        dist_params = (self.p,)
 
-        super(Categorical, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(Categorical, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         def random_choice(k, *args, **kwargs):
@@ -452,11 +468,11 @@ class Constant(Univariate, Discrete):
         Constant parameter.
     """
 
-    def __init__(self, c, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, c, *args, **kwargs):
         self.mean = self.median = self.mode = self.c = tt.as_tensor_variable(c)
-        self.dist_params = (self.c,)
-        super(Constant, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        dist_params = (self.c,)
+
+        super(Constant, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         c = draw_values([self.c], point=point)
@@ -475,9 +491,10 @@ def logp(self, value):
 
 def ConstantDist(*args, **kwargs):
     warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.",
-                DeprecationWarning)
+                  DeprecationWarning)
     return Constant(*args, **kwargs)
 
+
 class ZeroInflatedPoisson(Univariate, Discrete):
     R"""
     Zero-inflated Poisson log-likelihood.
@@ -507,16 +524,16 @@ class ZeroInflatedPoisson(Univariate, Discrete):
         Expected proportion of Poisson variates (0 < psi < 1)
 
     """
-    def __init__(self, theta, psi, ndim=None, size=None, dtype=None, *args, **kwargs):
+
+    def __init__(self, theta, psi, *args, **kwargs):
         self.theta = tt.as_tensor_variable(theta)
         self.psi = tt.as_tensor_variable(psi)
         self.pois = Poisson.dist(theta)
         self.mode = self.pois.mode
 
-        self.dist_params = (self.theta, self.psi)
+        dist_params = (self.theta, self.psi)
 
-        super(ZeroInflatedPoisson, self).__init__(
-            self.dist_params, ndim, size, dtype, *args, **kwargs)
+        super(ZeroInflatedPoisson, self).__init__(dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         theta, psi = draw_values([self.theta, self.psi], point=point)
@@ -562,14 +579,18 @@ class ZeroInflatedNegativeBinomial(Univariate, Discrete):
         Expected proportion of NegativeBinomial variates (0 < psi < 1)
     """
 
-    def __init__(self, mu, alpha, psi, ndim=None, size=None, dtype=None, *args, **kwargs):
-        self.mu = mu
-        self.alpha = alpha
-        self.psi = psi
+    def __init__(self, mu, alpha, psi, *args, **kwargs):
+        self.mu = tt.as_tensor_variable(mu)
+        self.alpha = tt.as_tensor_variable(alpha)
+        self.psi = tt.as_tensor_variable(psi)
+
         self.nb = NegativeBinomial.dist(mu, alpha)
         self.mode = self.nb.mode
-        self.dist_params = (mu, alpha, psi)
-        super(ZeroInflatedNegativeBinomial, self).__init__(self.dist_params, ndim, size, dtype, *args, **kwargs)
+
+        dist_params = (self.mu, self.alpha, self.psi)
+
+        super(ZeroInflatedNegativeBinomial, self).__init__(
+            dist_params, *args, **kwargs)
 
     def random(self, point=None, size=None, repeat=None):
         mu, alpha, psi = draw_values(
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8938bc3cf4..ff49faaa6a 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -1,4 +1,5 @@
 import collections
+import warnings
 
 import numpy as np
 import theano.tensor as tt
@@ -207,14 +208,22 @@ def default(self):
         return self.get_test_value(self.testval, self.defaults)
 
     def get_test_value(self, val=None, defaults=None):
+        test_val = None
         if val is None:
             for v in defaults:
                 the_attr = getattr(self, v, None)
 
                 if the_attr is not None and np.all(np.isfinite(self.getattr_value(v))):
-                    return self.getattr_value(v)
+                    test_val = self.getattr_value(v)
+                    break
         else:
-            return self.getattr_value(val)
+            test_val = self.getattr_value(val)
+
+        if test_val is not None:
+            if self.ndim_reps > 0 and hasattr(self.shape_reps, 'value'):
+                test_val = np.tile(test_val, self.shape_reps.value)
+
+            return test_val
 
         raise AttributeError(str(self) + " has no finite default value to use, checked: " +
                              str(defaults) + " pass testval argument or adjust so value is finite.")
@@ -300,14 +309,42 @@ def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64',
 
 class Univariate(Distribution):
 
-    def __init__(self, dist_params, ndim=None, size=None, dtype=None,
+    def __init__(self, dist_params, ndim=None, size=None,
+                 shape=None, dtype=None,
                  bcast=None, *args, **kwargs):
         r"""This constructor automates some of the shape determination, since
         univariate distributions are simple in that regard.
+
+        Parameters
+        ----------
+        dist_params: tuple
+            A tuple containing the distributions parameters.
+        ndim: int
+            A hint for the number of dimensions.
+            (Not currently used, but could be useful in cases for which
+            the shape dimensions aren't easily assessed.)
+        size: tuple (int)
+            Shape of replications.
+        shape: tuple (int)
+            Deprecated; use ``size``.
+        dtype: string
+            Name of primitive numeric type.
+        bcast: tuple (bool)
+            Hint for broadcast dimensions.
         """
 
+        if shape is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            if size is None:
+                size = shape
+
         self.dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
 
+        if size is not None:
+            size = np.atleast_1d(size)
+
         ndim, size, bcast = _infer_ndim_bcast(*((ndim, size) +
                                                 self.dist_params))
         if dtype is None:

From 19497d4bfb67c00dcb19d221bbf0cc7b7f518f6c Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Tue, 3 Jan 2017 19:30:05 -0600
Subject: [PATCH 15/17] fixes to univariate shape determination; typo fixes in
 multivatiate (still not ready)

---
 pymc3/distributions/distribution.py | 24 ++++++--
 pymc3/distributions/multivariate.py | 96 ++++++++++++++++-------------
 2 files changed, 70 insertions(+), 50 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index ff49faaa6a..62b4ef39af 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -24,7 +24,7 @@ class _Unpickling(object):
 
 def _as_tensor_shape_variable(var):
     r""" Just some useful shape object parsing steps.
-    Copied from `_infer_ndim_bcast`.
+    Mostly copied from `_infer_ndim_bcast`.
     """
 
     if var is None:
@@ -342,17 +342,29 @@ def __init__(self, dist_params, ndim=None, size=None,
 
         self.dist_params = tuple(tt.as_tensor_variable(x) for x in dist_params)
 
-        if size is not None:
-            size = np.atleast_1d(size)
+        # Parameters need to match in shape; we use the following to
+        # determine what the [independent terms'] ultimate shape is.
+        ndim, ind_size, bcast = _infer_ndim_bcast(*((ndim, None) +
+                                                  self.dist_params))
+
+        if size is None:
+            size = ()
+
+        # Add broadcast info from replication dimensions.
+        bcast += tuple(True if s_ == 1 else False for s_ in size)
+
+        # We have to be careful with `as_tensor_variable`; it will produce
+        # empty collections with dtype=floatX, which violates our expectations
+        # for a shape object.
+        size = tt.as_tensor_variable(np.asarray(size, dtype=np.int),
+                                     ndim=1)
 
-        ndim, size, bcast = _infer_ndim_bcast(*((ndim, size) +
-                                                self.dist_params))
         if dtype is None:
             dtype = tt.scal.upcast(*((tt.config.floatX,) +
                                      tuple(x.dtype for x in self.dist_params)))
 
         super(Univariate, self).__init__(
-            tuple(), tuple(), size, bcast, *args, **kwargs)
+            tuple(), ind_size, size, bcast, *args, **kwargs)
 
 
 class Multivariate(Distribution):
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index dcffee6c16..96961d4b14 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -14,7 +14,12 @@
 from . import transforms
 from ..model import Deterministic
 from .continuous import ChiSquared, Normal
-from .distribution import Multivariate, Continuous, Discrete, draw_values, generate_samples
+from .distribution import (
+    Multivariate,
+    Continuous,
+    Discrete,
+    draw_values,
+    generate_samples)
 from .special import gammaln, multigammaln
 from .dist_math import bound, logpow, factln
 
@@ -58,6 +63,7 @@ def get_tau_cov(mu, tau=None, cov=None):
 
     return (tau, cov)
 
+
 class MvNormal(Multivariate, Continuous):
     r"""
     Multivariate normal log-likelihood.
@@ -146,7 +152,6 @@ def logp(self, value):
 class MvStudentT(Multivariate, Continuous):
     """
     Multivariate Student T log-likelihood.
->>>>>>> started splitting up dist. classes; some small testval test changes
 
     .. math::
         f(\mathbf{x}| \nu,\mu,\Sigma) =
@@ -250,8 +255,10 @@ class Dirichlet(Multivariate, Continuous):
     Only the first `k-1` elements of `x` are expected. Can be used
     as a parent of Multinomial and Categorical nevertheless.
     """
-    def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None, dtype=None, *args, **kwargs):
-        # TODO: Need to apply replications/size.
+
+    def __init__(self, a, dtype=tt.config.floatX,
+                 transform=transforms.stick_breaking,
+                 *args, **kwargs):
         self.a = tt.as_tensor_variable(a)
         self.mean = self.a / tt.sum(self.a)
         self.mode = tt.switch(tt.all(self.a > 1),
@@ -260,22 +267,25 @@ def __init__(self, a, transform=transforms.stick_breaking, ndim=None, size=None,
 
         self.dist_params = (self.a,)
 
-        # TODO: How do we want to use ndim?
         shape_supp = (self.a.shape[-1],)
         shape_ind = tuple(self.a.shape[:-1])
 
+        shape_reps = kwargs.pop('shape', None)
+        if shape_reps is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            shape_reps = kwargs.pop('size', ())
+
         # FIXME: this isn't correct/ideal
         if self.a.ndim > 0:
             bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
         else:
             bcast = (False,)
 
-        if dtype is None:
-            dtype = self.mu.dtype
-
-        super(Dirichlet, self).__init__(shape_supp, shape_ind, (), bcast,
-                                        dtype, transform=transform, *args,
-                                        **kwargs)
+        super(Dirichlet, self).__init__(shape_supp, shape_ind, shape_reps,
+                                        bcast, dtype, transform=transform,
+                                        *args, **kwargs)
 
     def random(self, point=None, size=None):
         a = draw_values([self.a], point=point)
@@ -331,44 +341,43 @@ class Multinomial(Multivariate, Discrete):
         be non-negative and sum to 1 along the last axis. They will be automatically
         rescaled otherwise.
     """
-    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args,
-                 **kwargs):
-        # TODO: Need to apply replications/size.
+
+    def __init__(self, n, p, ndim=None, size=None, dtype='int32',
+                 *args, **kwargs):
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p)
         self.p = self.p / tt.sum(self.p, axis=-1, keepdims=True)
 
-        if len(self.shape) == 2:
-            try:
-                assert n.shape == (self.shape[0],)
-            except AttributeError:
-                # this occurs when n is a scalar Python int or float
-                n *= tt.ones(self.shape[0])
-
-            self.n = tt.shape_padright(n)
-            self.p = p if p.ndim == 2 else tt.shape_padleft(p)
-        else:
-            self.n = n
-            self.p = p
-
         self.mean = self.n * self.p
-        self.mode = tt.cast(tt.round(self.mean), 'int32')
+        self.mode = tt.cast(tt.round(self.mean), dtype)
 
         self.dist_params = (self.n, self.p)
 
-        # TODO: check that n == len(p)?
-        # TODO: How do we want to use ndim?
-        shape_supp = (self.n,)
+        # XXX, FIXME: Is this really multivariate?
+        shape_supp = (1,)
         shape_ind = ()
+        # shape_supp = (self.p.shape[-1],)
+        # shape_ind = tuple(self.p.shape[:-1])
 
-        # FIXME: this isn't correct/ideal
+        shape_reps = kwargs.pop('shape', None)
+        if shape_reps is not None:
+            warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                           ' specify the shape and number of replications.'),
+                          DeprecationWarning)
+            shape_reps = kwargs.pop('size', ())
+
+        # XXX, FIXME: Is this really multivariate?
         bcast = (False,)
+        # if self.p.ndim > 0:
+        #     bcast = (False,) * (1 + tt.get_vector_length(shape_ind))
+        # else:
+        #     bcast = (False,)
+
         if dtype is None:
             dtype = self.p.dtype
 
-        # FIXME: n.shape == p.shape? bcast, etc.
-        super(Multinomial, self).__init__(shape_supp, shape_ind, (), bcast,
-                                          dtype, *args, **kwargs)
+        super(Multinomial, self).__init__(shape_supp, shape_ind, shape_reps,
+                                          bcast, dtype, *args, **kwargs)
 
     def _random(self, n, p, size=None):
         if size == p.shape:
@@ -397,7 +406,6 @@ def logp(self, x):
         )
 
 
-
 def posdef(AA):
     try:
         np.linalg.cholesky(AA)
@@ -484,8 +492,8 @@ class Wishart(Multivariate, Continuous):
     use WishartBartlett or LKJCorr.
     """
 
-    def __init__(self, n, V, ndim=None, size=None, dtype=None, *args,
-                 **kwargs):
+    def __init__(self, n, V, ndim=None, size=None, dtype=None,
+                 *args, **kwargs):
         warnings.warn('The Wishart distribution can currently not be used '
                       'for MCMC sampling. The probability of sampling a '
                       'symmetric matrix is basically zero. Instead, please '
@@ -534,8 +542,7 @@ def logp(self, X):
                      matrix_pos_def(X),
                      tt.eq(X, X.T),
                      n > (p - 1),
-                     broadcast_conditions=False
-        )
+                     broadcast_conditions=False)
 
 
 def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None):
@@ -656,7 +663,8 @@ class LKJCorr(Multivariate, Continuous):
         100(9), pp.1989-2001.
     """
 
-    def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
+    def __init__(self, n, p, ndim=None, size=None, dtype=tt.config.floatX,
+                 *args, **kwargs):
         # TODO: Need to apply replications/size.
         self.n = tt.as_tensor_variable(n, ndim=0)
         self.p = tt.as_tensor_variable(p, ndim=0)
@@ -680,7 +688,8 @@ def __init__(self, n, p, ndim=None, size=None, dtype=None, *args, **kwargs):
         if dtype is None:
             dtype = self.n.dtype
 
-        super(LKJCorr, self).__init__(shape_supp, shape_ind, (), bcast, dtype,
+        super(LKJCorr, self).__init__(shape_supp, shape_ind, (),
+                                      bcast, dtype,
                                       *args, **kwargs)
 
     def _normalizing_constant(self, n, p):
@@ -714,5 +723,4 @@ def logp(self, x):
                      tt.all(X <= 1), tt.all(X >= -1),
                      matrix_pos_def(X),
                      n > 0,
-                     broadcast_conditions=False
-        )
+                     broadcast_conditions=False)

From 56d9a1e50c6e619e7f6cb8496dc55897531279d0 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Tue, 3 Jan 2017 20:13:14 -0600
Subject: [PATCH 16/17] fixed test_value replications issue

---
 pymc3/distributions/distribution.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 62b4ef39af..8db462d4d3 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -182,7 +182,7 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
 
         ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
         if ndim_sum == 0:
-            self.shape = tt.constant([], dtype='int64')
+            self.shape = tt.constant([], dtype='int32')
         else:
             self.shape = tuple(self.shape_reps) +\
                 tuple(self.shape_ind) +\
@@ -221,7 +221,9 @@ def get_test_value(self, val=None, defaults=None):
 
         if test_val is not None:
             if self.ndim_reps > 0 and hasattr(self.shape_reps, 'value'):
-                test_val = np.tile(test_val, self.shape_reps.value)
+                bcast_shape = np.append(self.shape_reps.value,
+                                        np.shape(test_val))
+                test_val = np.broadcast_to(test_val, bcast_shape)
 
             return test_val
 
@@ -311,7 +313,7 @@ class Univariate(Distribution):
 
     def __init__(self, dist_params, ndim=None, size=None,
                  shape=None, dtype=None,
-                 bcast=None, *args, **kwargs):
+                 *args, **kwargs):
         r"""This constructor automates some of the shape determination, since
         univariate distributions are simple in that regard.
 
@@ -329,8 +331,6 @@ def __init__(self, dist_params, ndim=None, size=None,
             Deprecated; use ``size``.
         dtype: string
             Name of primitive numeric type.
-        bcast: tuple (bool)
-            Hint for broadcast dimensions.
         """
 
         if shape is not None:

From e702da117989732cc9b0b0fcc0d7897cc3f6072f Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@gmail.com>
Date: Tue, 3 Jan 2017 22:00:31 -0600
Subject: [PATCH 17/17] fixed another broadcasting issue; started updating
 tests

---
 pymc3/distributions/continuous.py         |  19 +--
 pymc3/distributions/distribution.py       |  66 +++++++---
 pymc3/tests/test_distribution_defaults.py |   8 +-
 pymc3/tests/test_distribution_shapes.py   | 146 ++++++++++++++++++++++
 pymc3/tests/test_distributions.py         |   6 +-
 5 files changed, 209 insertions(+), 36 deletions(-)
 create mode 100644 pymc3/tests/test_distribution_shapes.py

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 3c91fafbba..fdad4344ff 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 import theano.tensor as tt
-from theano.gof.op import get_test_value
 from scipy import stats
 import warnings
 
@@ -27,26 +26,20 @@
 class PositiveUnivariateContinuous(Univariate, Continuous):
     """Base class for positive univariate continuous distributions"""
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, dist_params, *args, **kwargs):
         transform = kwargs.get('transform', transforms.log)
-        super(PositiveUnivariateContinuous, self).__init__(transform=transform,
+        super(PositiveUnivariateContinuous, self).__init__(dist_params,
+                                                           transform=transform,
                                                            *args, **kwargs)
-        # TODO: is there a better way to use Theano's
-        # existing `...tag.test_value` mechanics?
-        ndim_sum = self.ndim_supp + self.ndim_ind + self.ndim_reps
-        if self.testval is None:
-            if ndim_sum == 0:
-                self.testval = 0.5
-            else:
-                self.testval = get_test_value(tt.alloc(*((0.5,) + self.shape)))
 
 
 class UnitUnivariateContinuous(Univariate, Continuous):
     """Base class for univariate continuous distributions in [0,1]"""
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, dist_params, *args, **kwargs):
         transform = kwargs.get('transform', transforms.logodds)
-        super(UnitUnivariateContinuous, self).__init__(transform=transform,
+        super(UnitUnivariateContinuous, self).__init__(dist_params,
+                                                       transform=transform,
                                                        *args, **kwargs)
 
 
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8db462d4d3..60e187c077 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -14,8 +14,7 @@
 
 __all__ = ['DensityDist', 'Distribution',
            'Continuous', 'Discrete', 'NoDistribution', 'TensorType',
-           'MultivariateContinuous', 'UnivariateContinuous',
-           'MultivariateDiscrete', 'UnivariateDiscrete']
+           'Multivariate']
 
 
 class _Unpickling(object):
@@ -221,8 +220,7 @@ def get_test_value(self, val=None, defaults=None):
 
         if test_val is not None:
             if self.ndim_reps > 0 and hasattr(self.shape_reps, 'value'):
-                bcast_shape = np.append(self.shape_reps.value,
-                                        np.shape(test_val))
+                bcast_shape = self.getattr_value(self.shape)
                 test_val = np.broadcast_to(test_val, bcast_shape)
 
             return test_val
@@ -261,6 +259,7 @@ class NoDistribution(Distribution):
     def __init__(self, shape_supp, shape_ind, shape_reps, bcast, dtype,
                  testval=None, defaults=[], transform=None, parent_dist=None,
                  *args, **kwargs):
+
         super(NoDistribution, self).__init__(shape_supp, shape_ind, shape_reps,
                                              bcast, dtype, testval=testval,
                                              defaults=defaults, *args,
@@ -302,12 +301,34 @@ def __init__(self, shape_supp, shape_ind, shape_reps, bcast,
 class DensityDist(Distribution):
     """Distribution based on a given log density function."""
 
-    def __init__(self, logp, ndim_support, ndim, size, bcast, dtype='float64',
-                 *args, **kwargs):
-        super(DensityDist, self).__init__(ndim, size, bcast, dtype, *args,
-                                          **kwargs)
+    def __init__(self, logp, shape_supp=None, shape_ind=None, shape_reps=None,
+                 bcast=None, dtype='float64', *args, **kwargs):
         self.logp = logp
 
+        # TODO: Could add some generic handling like this in
+        # `Distribution.__init__`, just to handle deprecated use of `shape`.
+        if (shape_supp is None) or (shape_ind is None) or (shape_reps is None):
+
+            # If we only got the old `shape` parameter, assume it's only
+            # specifying replications.
+            old_shape = kwargs.get('shape', None)
+            if old_shape is not None:
+                warnings.warn(('The `shape` parameter is deprecated; use `size` to'
+                               ' specify the shape and number of replications.'),
+                              DeprecationWarning)
+                shape_supp = tuple()
+                shape_ind = tuple()
+                shape_reps = old_shape
+
+                bcast += tuple(True if s_ == 1 else False
+                               for s_ in old_shape)
+            else:
+                raise ValueError("shapes and bcast must be specified.")
+
+        super(DensityDist, self).__init__(shape_supp, shape_ind, shape_reps,
+                                          bcast, dtype, *args,
+                                          **kwargs)
+
 
 class Univariate(Distribution):
 
@@ -320,7 +341,14 @@ def __init__(self, dist_params, ndim=None, size=None,
         Parameters
         ----------
         dist_params: tuple
-            A tuple containing the distributions parameters.
+            A tuple containing the distributions parameters.  These parameters
+            are checked for shape compatibility(/"broadcastibility"), so make
+            sure their dimensions line up.  For example, if a the distribution
+            for a scalar random variable has parameters `(a, b)`, where `a` is
+            scalar and `b` is a vector, if we get an array of `a` values, then
+            `b` should be given an extra dimension to broadcast along those
+            independent variates implied by `a`.  This function will try
+            to figure that stuff out, but no promises.
         ndim: int
             A hint for the number of dimensions.
             (Not currently used, but could be useful in cases for which
@@ -347,17 +375,21 @@ def __init__(self, dist_params, ndim=None, size=None,
         ndim, ind_size, bcast = _infer_ndim_bcast(*((ndim, None) +
                                                   self.dist_params))
 
-        if size is None:
-            size = ()
-
-        # Add broadcast info from replication dimensions.
-        bcast += tuple(True if s_ == 1 else False for s_ in size)
-
         # We have to be careful with `as_tensor_variable`; it will produce
         # empty collections with dtype=floatX, which violates our expectations
         # for a shape object.
-        size = tt.as_tensor_variable(np.asarray(size, dtype=np.int),
-                                     ndim=1)
+        if size is None or np.alen(size) == 0:
+            size = np.array((), dtype=np.int)
+        elif np.shape(size) == ():
+            size = np.asarray((size,), dtype=np.int)
+        else:
+            size = np.asarray(size, dtype=np.int)
+
+        # Add broadcast info from replication dimensions.
+        bcast += tuple(True if s_ == 1 else False
+                       for s_ in size)
+
+        size = tt.as_tensor_variable(size, ndim=1)
 
         if dtype is None:
             dtype = tt.scal.upcast(*((tt.config.floatX,) +
diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py
index e70b480422..549c402a64 100644
--- a/pymc3/tests/test_distribution_defaults.py
+++ b/pymc3/tests/test_distribution_defaults.py
@@ -1,19 +1,21 @@
 from __future__ import division
 
 from ..model import Model
-from ..distributions import DiscreteUniform, Continuous
+from ..distributions import DiscreteUniform, Univariate, Continuous
 
 import numpy as np
 from nose.tools import raises
 
 
-class DistTest(Continuous):
+class DistTest(Univariate, Continuous):
 
     def __init__(self, a, b, *args, **kwargs):
-        super(DistTest, self).__init__(*args, **kwargs)
         self.a = a
         self.b = b
 
+        dist_params = (self.a, self.b)
+        super(DistTest, self).__init__(dist_params, *args, **kwargs)
+
     def logp(self, v):
         return 0
 
diff --git a/pymc3/tests/test_distribution_shapes.py b/pymc3/tests/test_distribution_shapes.py
new file mode 100644
index 0000000000..d7637952be
--- /dev/null
+++ b/pymc3/tests/test_distribution_shapes.py
@@ -0,0 +1,146 @@
+from __future__ import division
+
+from ..model import Model
+from ..distributions import Normal
+
+import numpy as np
+from nose.tools import raises
+
+
+def test_univariate_shape_info():
+    with Model() as model:
+        # Test scalar case with replications.
+        x = Normal('x', 0, 1, size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ())
+        assert x.distribution.ndim_ind == 0
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test scalar case with empty size input.
+        x = Normal('x', 0, 1, size=())
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, ())
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), ())
+        # FIXME: `Distribution.random` adds an unnecessary dimension.
+        # np.testing.assert_array_equal(np.shape(x.random()), ())
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast, ())
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), ())
+        assert x.distribution.ndim_ind == 0
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ())
+        assert x.distribution.ndim_reps == 0
+
+    with Model() as model:
+        # Test independent terms alone.
+        x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (2,))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (2,))
+        np.testing.assert_array_equal(np.shape(x.random()), (2,))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast, (False,))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), ())
+        assert x.distribution.ndim_reps == 0
+
+    with Model() as model:
+        # Test independent terms and replication.
+        x = Normal('x', mu=np.arange(0, 2), tau=np.arange(1, 3),
+                   size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test broadcasting among the independent terms.
+        x = Normal('x', mu=np.arange(0, 2), tau=1, size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+    with Model() as model:
+        # Test broadcasting among the independent terms, where independent
+        # terms are determined by a non-default test value parameter.
+        x = Normal('x', mu=0, tau=np.r_[1, 2], size=(3, 2))
+
+        # Check resulting objects' shapes
+        np.testing.assert_array_equal(x.distribution.shape, (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.tag.test_value), (3, 2, 2))
+        np.testing.assert_array_equal(np.shape(x.random()), (3, 2, 2))
+
+        # Check shape info
+        np.testing.assert_array_equal(x.distribution.bcast,
+                                      (False, False, False))
+        np.testing.assert_array_equal(x.distribution.shape_supp.eval(), ())
+        assert x.distribution.ndim_supp == 0
+        np.testing.assert_array_equal(x.distribution.shape_ind.eval(), (2,))
+        assert x.distribution.ndim_ind == 1
+        np.testing.assert_array_equal(x.distribution.shape_reps.eval(), (3, 2))
+        assert x.distribution.ndim_reps == 2
+
+
+# TODO
+# def test_multivariate_shape_info():
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3,)), np.zeros((3, 3)))
+#
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3,)), np.zeros((3,)), size=(3, 2))
+#
+#     with Model() as model:
+#         x = MvNormal('x', np.ones((3, 2)), np.zeros((3, 3)))
+#
+#     with Model() as model:
+#         x = Dirichlet('x', np.r_[1, 1, 1], size=(3, 2))
+#
+#     with Model() as model:
+#         x = Multinomial('x', 2, np.r_[0.5, 0.5], size=(3, 2))
+#
+#     with Model() as model:
+#         x = Multinomial('x', np.r_[2, 2], np.r_[0.5, 0.5])
+#
+#     with Model() as model:
+#         x = Multinomial('x', 2, np.r_[[0.5, 0.5], [0.1, 0.9]])
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 7f18ea0958..d81cdefd68 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -118,19 +118,19 @@ def build_model(distfam, valuedomain, vardomains, extra_args={}):
 
 
 def integrate_nd(f, domain, shape, dtype):
-    if shape == () or shape == (1,):
+    if np.array_equal(shape, ()) or np.array_equal(shape, (1,)):
         if dtype in continuous_types:
             return integrate.quad(f, domain.lower, domain.upper, epsabs=1e-8)[0]
         else:
             return sum(f(j) for j in range(domain.lower, domain.upper + 1))
-    elif shape == (2,):
+    elif np.array_equal(shape, (2,)):
         def f2(a, b):
             return f([a, b])
 
         return integrate.dblquad(f2, domain.lower[0], domain.upper[0],
                                  lambda _: domain.lower[1],
                                  lambda _: domain.upper[1])[0]
-    elif shape == (3,):
+    elif np.array_equal(shape, (3,)):
         def f3(a, b, c):
             return f([a, b, c])