From c07c7097b85c0a8e13e399821f44c1b257ce899c Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Wed, 10 Mar 2021 23:27:54 -0600
Subject: [PATCH 01/16] Add fallback for missing value_var in BaseTrace

---
 pymc3/backends/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index 1b9e589bc5..114b83246a 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -61,7 +61,7 @@ def __init__(self, name, model=None, vars=None, test_point=None):
         model = modelcontext(model)
         self.model = model
         if vars is None:
-            vars = [v.tag.value_var for v in model.unobserved_RVs]
+            vars = [getattr(v.tag, "value_var", v) for v in model.unobserved_RVs]
         self.vars = vars
         self.varnames = [var.name for var in vars]
         self.fn = model.fastfn(vars)

From 34a66c7142ba18750f092bbeef1d64594ad1a75a Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Wed, 10 Mar 2021 23:27:22 -0600
Subject: [PATCH 02/16] Use name from Variable.name in pymc3.util.get_var_name

---
 pymc3/util.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pymc3/util.py b/pymc3/util.py
index 247eb380f3..459968aa28 100644
--- a/pymc3/util.py
+++ b/pymc3/util.py
@@ -22,8 +22,6 @@
 import numpy as np
 import xarray
 
-from aesara.tensor.var import TensorVariable
-
 from pymc3.exceptions import SamplingError
 
 LATEX_ESCAPE_RE = re.compile(r"(%|_|\$|#|&)", re.MULTILINE)
@@ -169,10 +167,7 @@ def get_repr_for_variable(variable, formatting="plain"):
 
 def get_var_name(var):
     """Get an appropriate, plain variable name for a variable."""
-    if isinstance(var, TensorVariable):
-        return super(TensorVariable, var).__str__()
-    else:
-        return str(var)
+    return getattr(var, "name", str(var))
 
 
 def update_start_vals(a, b, model):

From 6c8a2b30dc70f5bda24c8f0bdaf047c7c1c66673 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:41:52 -0600
Subject: [PATCH 03/16] Reinstate log-likelihood transforms

---
 pymc3/backends/base.py              |  13 +-
 pymc3/distributions/__init__.py     | 211 ++++++++++++++++------------
 pymc3/distributions/continuous.py   |  48 ++++---
 pymc3/distributions/distribution.py |  16 +--
 pymc3/distributions/transforms.py   |  77 ----------
 pymc3/model.py                      |  80 +++++++----
 pymc3/tests/test_transforms.py      |  75 +++++-----
 7 files changed, 259 insertions(+), 261 deletions(-)

diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py
index 114b83246a..866a9bbcdc 100644
--- a/pymc3/backends/base.py
+++ b/pymc3/backends/base.py
@@ -61,7 +61,18 @@ def __init__(self, name, model=None, vars=None, test_point=None):
         model = modelcontext(model)
         self.model = model
         if vars is None:
-            vars = [getattr(v.tag, "value_var", v) for v in model.unobserved_RVs]
+            vars = []
+            for v in model.unobserved_RVs:
+                var = getattr(v.tag, "value_var", v)
+                transform = getattr(var.tag, "transform", None)
+                if transform:
+                    # We need to create and add an un-transformed version of
+                    # each transformed variable
+                    untrans_var = transform.backward(var)
+                    untrans_var.name = v.name
+                    vars.append(untrans_var)
+                vars.append(var)
+
         self.vars = vars
         self.varnames = [var.name for var in vars]
         self.fn = model.fastfn(vars)
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index d33c63b073..bee51ca910 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -12,6 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 from functools import singledispatch
+from itertools import chain
 from typing import Generator, List, Optional, Tuple, Union
 
 import aesara.tensor as aet
@@ -31,6 +32,11 @@
 ]
 
 
+@singledispatch
+def logp_transform(op, inputs):
+    return None
+
+
 def _get_scaling(total_size, shape, ndim):
     """
     Gets scaling constant for logp
@@ -135,7 +141,6 @@ def change_rv_size(
 
 def rv_log_likelihood_args(
     rv_var: TensorVariable,
-    rv_value: Optional[TensorVariable] = None,
     transformed: Optional[bool] = True,
 ) -> Tuple[TensorVariable, TensorVariable]:
     """Get a `RandomVariable` and its corresponding log-likelihood `TensorVariable` value.
@@ -146,38 +151,24 @@ def rv_log_likelihood_args(
         A variable corresponding to a `RandomVariable`, whether directly or
         indirectly (e.g. an observed variable that's the output of an
         `Observed` `Op`).
-    rv_value
-        The measure-space input `TensorVariable` (i.e. "input" to a
-        log-likelihood).
     transformed
         When ``True``, return the transformed value var.
 
     Returns
     =======
     The first value in the tuple is the `RandomVariable`, and the second is the
-    measure-space variable that corresponds with the latter.  The first is used
-    to determine the log likelihood graph and the second is the "input"
-    parameter to that graph.  In the case of an observed `RandomVariable`, the
-    "input" is actual data; in all other cases, it's just another
-    `TensorVariable`.
+    measure-space variable that corresponds with the latter (i.e. the "value"
+    variable).
 
     """
 
-    if rv_value is None:
-        if rv_var.owner and isinstance(rv_var.owner.op, Observed):
-            rv_var, rv_value = rv_var.owner.inputs
-        elif hasattr(rv_var.tag, "value_var"):
-            rv_value = rv_var.tag.value_var
-        else:
-            return rv_var, None
-
-    rv_value = aet.as_tensor_variable(rv_value)
-
-    transform = getattr(rv_value.tag, "transform", None)
-    if transformed and transform:
-        rv_value = transform.forward(rv_value)
-
-    return rv_var, rv_value
+    if rv_var.owner and isinstance(rv_var.owner.op, Observed):
+        return tuple(rv_var.owner.inputs)
+    elif hasattr(rv_var.tag, "value_var"):
+        rv_value = rv_var.tag.value_var
+        return rv_var, rv_value
+    else:
+        return rv_var, None
 
 
 def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None, None]:
@@ -197,22 +188,53 @@ def strip_observed(x: TensorVariable) -> TensorVariable:
         return x
 
 
-def sample_to_measure_vars(graphs: List[TensorVariable]) -> List[TensorVariable]:
-    """Replace `RandomVariable` terms in graphs with their measure-space counterparts."""
+def sample_to_measure_vars(
+    graphs: List[TensorVariable],
+) -> Tuple[List[TensorVariable], List[TensorVariable]]:
+    """Replace sample-space variables in graphs with their measure-space counterparts.
+
+    Sample-space variables are `TensorVariable` outputs of `RandomVariable`
+    `Op`s.  Measure-space variables are `TensorVariable`s that correspond to
+    the value of a sample-space variable in a likelihood function (e.g. ``x``
+    in ``p(X = x)``, where ``X`` is the corresponding sample-space variable).
+    (``x`` is also the variable found in ``rv_var.tag.value_var``, so this
+    function could also be called ``sample_to_value_vars``.)
+
+    Parameters
+    ==========
+    graphs
+        The graphs in which random variables are to be replaced by their
+        measure variables.
+
+    Returns
+    =======
+    Tuple containing the transformed graphs and a ``dict`` of the replacements
+    that were made.
+    """
     replace = {}
-    for anc in rv_ancestors(graphs):
-        measure_var = getattr(anc.tag, "value_var", None)
-        if measure_var is not None:
-            replace[anc] = measure_var
+    for anc in chain(rv_ancestors(graphs), graphs):
+
+        if not (anc.owner and isinstance(anc.owner.op, RandomVariable)):
+            continue
+
+        _, value_var = rv_log_likelihood_args(anc)
+
+        if value_var is not None:
+            replace[anc] = value_var
+
+    if replace:
+        measure_graphs = clone_replace(graphs, replace=replace)
+    else:
+        measure_graphs = graphs
 
-    dist_params = clone_replace(graphs, replace=replace)
-    return dist_params
+    return measure_graphs, replace
 
 
 def logpt(
     rv_var: TensorVariable,
     rv_value: Optional[TensorVariable] = None,
-    jacobian: bool = True,
+    jacobian: Optional[bool] = True,
+    transformed: Optional[bool] = True,
     scaling: Optional[bool] = True,
     **kwargs,
 ) -> TensorVariable:
@@ -228,15 +250,26 @@ def logpt(
     rv_var
         The `RandomVariable` output that determines the log-likelihood graph.
     rv_value
-        The input variable for the log-likelihood graph.
+        The input variable for the log-likelihood graph.  If `rv_value` is
+        a transformed variable, its transformations will be applied.
+        If no value is provided, `rv_var.tag.value_var` will be checked and,
+        when available, used.
     jacobian
         Whether or not to include the Jacobian term.
+    transformed
+        Return the transformed version of the log-likelihood graph.
     scaling
         A scaling term to apply to the generated log-likelihood graph.
 
     """
 
-    rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value)
+    rv_var, rv_value_var = rv_log_likelihood_args(rv_var)
+
+    if rv_value is None:
+        rv_value = rv_value_var
+    else:
+        rv_value = aet.as_tensor(rv_value)
+
     rv_node = rv_var.owner
 
     if not rv_node:
@@ -244,13 +277,13 @@ def logpt(
 
     if not isinstance(rv_node.op, RandomVariable):
 
+        # This will probably need another generic function...
         if isinstance(rv_node.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1)):
 
             raise NotImplementedError("Missing value support is incomplete")
 
             # "Flatten" and sum an array of indexed RVs' log-likelihoods
             rv_var, missing_values = rv_node.inputs
-            rv_value = rv_var.tag.value_var
 
             missing_values = missing_values.data
             logp_var = aet.sum(
@@ -268,28 +301,40 @@ def logpt(
 
         return aet.zeros_like(rv_var)
 
+    if rv_value_var is None:
+        raise NotImplementedError(f"The log-likelihood for {rv_var} is undefined")
+
+    # This case should be reached when `rv_var` is either the result of an
+    # `Observed` or a `RandomVariable` `Op`
     rng, size, dtype, *dist_params = rv_node.inputs
 
-    dist_params = sample_to_measure_vars(dist_params)
+    dist_params, replacements = sample_to_measure_vars(dist_params)
 
-    if jacobian:
-        logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs)
-    else:
-        logp_var = _logp_nojac(rv_node.op, rv_value, *dist_params, **kwargs)
+    logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs)
 
-    # Replace `RandomVariable` ancestors with their corresponding
-    # log-likelihood input variables
-    lik_replacements = [
-        (v, v.tag.value_var)
-        for v in ancestors([logp_var])
-        if v.owner and isinstance(v.owner.op, RandomVariable) and getattr(v.tag, "value_var", None)
-    ]
+    # If any of the measure vars are transformed measure-space variables
+    # (signified by having a `transform` value in their tags), then we apply
+    # the their transforms and add their Jacobians (when enabled)
+    if transformed:
+        logp_var = transform_logp(
+            logp_var,
+            tuple(replacements.values()) + (rv_value_var,),
+        )
+
+        transform = getattr(rv_value_var.tag, "transform", None)
+
+        if transform and jacobian:
+            transformed_jacobian = transform.jacobian_det(rv_value_var)
+            if transformed_jacobian:
+                if logp_var.ndim > transformed_jacobian.ndim:
+                    logp_var = logp_var.sum(axis=-1)
+                logp_var += transformed_jacobian
 
-    (logp_var,) = clone_replace([logp_var], replace=lik_replacements)
+        (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value})
 
     if scaling:
         logp_var *= _get_scaling(
-            getattr(rv_var.tag, "total_size", None), rv_value.shape, rv_value.ndim
+            getattr(rv_var.tag, "total_size", None), rv_value_var.shape, rv_value_var.ndim
         )
 
     if rv_var.name is not None:
@@ -298,6 +343,25 @@ def logpt(
     return logp_var
 
 
+def transform_logp(logp_var: TensorVariable, inputs: List[TensorVariable]) -> TensorVariable:
+    """Transform the inputs of a log-likelihood graph."""
+    trans_replacements = {}
+    for measure_var in inputs:
+
+        transform = getattr(measure_var.tag, "transform", None)
+
+        if transform is None:
+            continue
+
+        trans_rv_value = transform.backward(measure_var)
+        trans_replacements[measure_var] = trans_rv_value
+
+    if trans_replacements:
+        (logp_var,) = clone_replace([logp_var], trans_replacements)
+
+    return logp_var
+
+
 @singledispatch
 def _logp(op, value, *dist_params, **kwargs):
     """Create a log-likelihood graph.
@@ -310,10 +374,10 @@ def _logp(op, value, *dist_params, **kwargs):
     return aet.zeros_like(value)
 
 
-def logcdf(rv_var, rv_value, **kwargs):
+def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs):
     """Create a log-CDF graph."""
 
-    rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value)
+    rv_var, rv_value = rv_log_likelihood_args(rv_var)
     rv_node = rv_var.owner
 
     if not rv_node:
@@ -321,9 +385,16 @@ def logcdf(rv_var, rv_value, **kwargs):
 
     rng, size, dtype, *dist_params = rv_node.inputs
 
-    dist_params = sample_to_measure_vars(dist_params)
+    dist_params, replacements = sample_to_measure_vars(dist_params)
+
+    logp_var = _logcdf(rv_node.op, rv_value, *dist_params, **kwargs)
 
-    return _logcdf(rv_node.op, rv_value, *dist_params, **kwargs)
+    if transformed:
+        logp_var = transform_logp(
+            logp_var, tuple(replacements.values()) + (rv_value,), jacobian=jacobian
+        )
+
+    return logp_var
 
 
 @singledispatch
@@ -338,38 +409,6 @@ def _logcdf(op, value, *args, **kwargs):
     raise NotImplementedError()
 
 
-def logp_nojac(rv_var, rv_value=None, **kwargs):
-    """Create a graph of the log-likelihood that doesn't include the Jacobian."""
-
-    rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value)
-    rv_node = rv_var.owner
-
-    if not rv_node:
-        raise TypeError()
-
-    rng, size, dtype, *dist_params = rv_node.inputs
-
-    dist_params = sample_to_measure_vars(dist_params)
-
-    return _logp_nojac(rv_node.op, rv_value, **kwargs)
-
-
-@singledispatch
-def _logp_nojac(op, value, *args, **kwargs):
-    """Return the logp, but do not include a jacobian term for transforms.
-
-    If we use different parametrizations for the same distribution, we
-    need to add the determinant of the jacobian of the transformation
-    to make sure the densities still describe the same distribution.
-    However, MAP estimates are not invariant with respect to the
-    parameterization, we need to exclude the jacobian terms in this case.
-
-    This function should be overwritten in base classes for transformed
-    distributions.
-    """
-    return logpt(op, value, *args, **kwargs)
-
-
 def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, **kwargs):
     """Return the sum of the logp values for the given observations.
 
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 6bd64540d0..65c7fee2c0 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -35,7 +35,7 @@
 from scipy.interpolate import InterpolatedUnivariateSpline
 
 from pymc3.aesaraf import floatX
-from pymc3.distributions import _logcdf, _logp, transforms
+from pymc3.distributions import _logcdf, _logp, logp_transform, transforms
 from pymc3.distributions.dist_math import (
     SplineWrapper,
     betaln,
@@ -100,36 +100,41 @@
 class PositiveContinuous(Continuous):
     """Base class for positive continuous distributions"""
 
-    default_transform = transforms.log
-
 
 class UnitContinuous(Continuous):
     """Base class for continuous distributions on [0,1]"""
 
-    default_transform = transforms.logodds
-
 
 class BoundedContinuous(Continuous):
     """Base class for bounded continuous distributions"""
 
-    default_transform = "auto"
 
-    def create_transform(transform="auto", lower=None, upper=None):
+@logp_transform.register(PositiveContinuous)
+def pos_cont_transform(op, inputs):
+    return transforms.log
 
-        lower = aet.as_tensor_variable(lower) if lower is not None else None
-        upper = aet.as_tensor_variable(upper) if upper is not None else None
 
-        if transform == "auto":
-            if lower is None and upper is None:
-                transform = None
-            elif lower is not None and upper is None:
-                transform = transforms.lowerbound(lower)
-            elif lower is None and upper is not None:
-                transform = transforms.upperbound(upper)
-            else:
-                transform = transforms.interval(lower, upper)
+@logp_transform.register(UnitContinuous)
+def unit_cont_transform(op, inputs):
+    return transforms.logodds
+
+
+@logp_transform.register(BoundedContinuous)
+def bounded_cont_transform(op, inputs):
+    _, _, _, lower, upper = inputs
+    lower = aet.as_tensor_variable(lower) if lower is not None else None
+    upper = aet.as_tensor_variable(upper) if upper is not None else None
+
+    if lower is None and upper is None:
+        transform = None
+    elif lower is not None and upper is None:
+        transform = transforms.lowerbound(lower)
+    elif lower is None and upper is not None:
+        transform = transforms.upperbound(upper)
+    else:
+        transform = transforms.interval(lower, upper)
 
-        return transform
+    return transform
 
 
 def assert_negative_support(var, label, distname, value=-1e-6):
@@ -229,11 +234,10 @@ def dist(cls, lower=0, upper=1, **kwargs):
         upper = aet.as_tensor_variable(floatX(upper))
         # mean = (upper + lower) / 2.0
         # median = self.mean
+        return super().dist([lower, upper], **kwargs)
 
-        transform = kwargs.pop("transform", cls.default_transform)
-        transform = cls.create_transform(transform, lower, upper)
 
-        return super().dist([lower, upper], transform=transform, **kwargs)
+BoundedContinuous.register(UniformRV)
 
 
 @_logp.register(UniformRV)
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 3265451b11..098662b28e 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -19,6 +19,7 @@
 import types
 import warnings
 
+from abc import ABC
 from typing import TYPE_CHECKING
 
 import dill
@@ -57,11 +58,10 @@ class _Unpickling:
     pass
 
 
-class Distribution:
+class Distribution(ABC):
     """Statistical distribution"""
 
     rv_op = None
-    default_transform = None
 
     def __new__(cls, name, *args, **kwargs):
         try:
@@ -93,19 +93,19 @@ def __new__(cls, name, *args, **kwargs):
         if "shape" in kwargs:
             raise DeprecationWarning("The `shape` keyword is deprecated; use `size`.")
 
+        transform = kwargs.pop("transform", None)
+
         rv_out = cls.dist(*args, rng=rng, **kwargs)
 
-        return model.register_rv(rv_out, name, data, total_size, dims=dims)
+        return model.register_rv(rv_out, name, data, total_size, dims=dims, transform=transform)
 
     @classmethod
     def dist(cls, dist_params, **kwargs):
-        transform = kwargs.pop("transform", cls.default_transform)
+
         testval = kwargs.pop("testval", None)
 
         rv_var = cls.rv_op(*dist_params, **kwargs)
 
-        rv_var.tag.transform = transform
-
         if testval is not None:
             rv_var.tag.test_value = testval
 
@@ -223,7 +223,6 @@ def __init__(
         dtype,
         testval=None,
         defaults=(),
-        transform=None,
         parent_dist=None,
         *args,
         **kwargs,
@@ -270,9 +269,6 @@ def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs):
         if dtype != "int16" and dtype != "int64":
             raise TypeError("Discrete classes expect dtype to be int16 or int64.")
 
-        if kwargs.get("transform", None) is not None:
-            raise ValueError("Transformations for discrete distributions " "are not allowed.")
-
         super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
 
 
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 163b9ba872..3aa6b81ec0 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -18,15 +18,12 @@
 import numpy as np
 
 from aesara.tensor.subtensor import advanced_set_subtensor1
-from aesara.tensor.type import TensorType
 
 from pymc3.aesaraf import floatX, gradient
-from pymc3.distributions import distribution
 from pymc3.math import invlogit, logit, logsumexp
 
 __all__ = [
     "Transform",
-    "transform",
     "stick_breaking",
     "logodds",
     "interval",
@@ -112,80 +109,6 @@ def jacobian_det(self, x):
         return aet.log(aet.abs_(grad))
 
 
-class TransformedDistribution(distribution.Distribution):
-    """A distribution that has been transformed from one space into another."""
-
-    def __init__(self, dist, transform, *args, **kwargs):
-        """
-        Parameters
-        ----------
-        dist: Distribution
-        transform: Transform
-        args, kwargs
-            arguments to Distribution"""
-        forward = transform.forward
-        testval = forward(dist.default())
-
-        self.dist = dist
-        self.transform_used = transform
-        # XXX: `FreeRV` no longer exists
-        v = None  # forward(FreeRV(name="v", distribution=dist))
-        self.type = v.type
-
-        super().__init__(v.shape.tag.test_value, v.dtype, testval, dist.defaults, *args, **kwargs)
-
-        if transform.name == "stickbreaking":
-            b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False))
-            # force the last dim not broadcastable
-            self.type = TensorType(v.dtype, b)
-
-    def logp(self, x):
-        """
-        Calculate log-probability of Transformed distribution at specified value.
-
-        Parameters
-        ----------
-        x: numeric
-            Value for which log-probability is calculated.
-
-        Returns
-        -------
-        TensorVariable
-        """
-        logp_nojac = self.logp_nojac(x)
-        jacobian_det = self.transform_used.jacobian_det(x)
-        if logp_nojac.ndim > jacobian_det.ndim:
-            logp_nojac = logp_nojac.sum(axis=-1)
-        return logp_nojac + jacobian_det
-
-    def logp_nojac(self, x):
-        """
-        Calculate log-probability of Transformed distribution at specified value
-        without jacobian term for transforms.
-
-        Parameters
-        ----------
-        x: numeric
-            Value for which log-probability is calculated.
-
-        Returns
-        -------
-        TensorVariable
-        """
-        return self.dist.logp(self.transform_used.backward(x))
-
-    def _repr_latex_(self, **kwargs):
-        # prevent TransformedDistributions from ending up in LaTeX representations
-        # of models
-        return None
-
-    def _distr_parameters_for_repr(self):
-        return []
-
-
-transform = Transform
-
-
 class Log(ElemwiseTransform):
     name = "log"
 
diff --git a/pymc3/model.py b/pymc3/model.py
index da683ad4f5..9599f3b288 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -29,7 +29,7 @@
 
 from aesara.compile.sharedvalue import SharedVariable
 from aesara.gradient import grad
-from aesara.graph.basic import Variable
+from aesara.graph.basic import Constant, Variable, graph_inputs
 from aesara.tensor.random.op import Observed, observed
 from aesara.tensor.var import TensorVariable
 from pandas import Series
@@ -39,7 +39,7 @@
 from pymc3.aesaraf import generator, gradient, hessian, inputvars
 from pymc3.blocking import DictToArrayBijection, RaveledVars
 from pymc3.data import GenTensorVariable, Minibatch
-from pymc3.distributions import change_rv_size, logpt, logpt_sum
+from pymc3.distributions import change_rv_size, logp_transform, logpt, logpt_sum
 from pymc3.exceptions import ImputationWarning
 from pymc3.math import flatten_list
 from pymc3.memoize import WithMemoization
@@ -893,7 +893,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
             with self:
                 free_RVs_logp = aet.sum(
                     [
-                        aet.sum(logpt(var, getattr(var.tag, "value_var", None)))
+                        aet.sum(logpt(var, getattr(var.tag, "value_var", None), transformed=True))
                         for var in self.free_RVs + self.potentials
                     ]
                 )
@@ -902,15 +902,19 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
             costs = [free_RVs_logp, observed_RVs_logp]
         else:
             costs = [self.logpt]
-        varnames = [var.name for var in grad_vars]
-        extra_vars = [var for var in self.free_RVs if var.name not in varnames]
+
+        input_vars = {i for i in graph_inputs(costs) if not isinstance(i, Constant)}
+        extra_vars = [var for var in self.free_RVs if var in input_vars]
         return ValueGradFunction(costs, grad_vars, extra_vars, **kwargs)
 
     @property
     def logpt(self):
         """Aesara scalar of log-probability of the model"""
         with self:
-            factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs]
+            factors = [
+                logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True)
+                for var in self.free_RVs
+            ]
             factors += [logpt_sum(obs) for obs in self.observed_RVs]
             factors += self.potentials
             logp_var = aet.sum([aet.sum(factor) for factor in factors])
@@ -924,12 +928,15 @@ def logpt(self):
     def logp_nojact(self):
         """Aesara scalar of log-probability of the model but without the jacobian
         if transformed Random Variable is presented.
-        Note that If there is no transformed variable in the model, logp_nojact
+
+        Note that if there is no transformed variable in the model, logp_nojact
         will be the same as logpt as there is no need for Jacobian correction.
         """
         with self:
             factors = [
-                logpt_sum(var, getattr(var.tag, "value_var", None), jacobian=False)
+                logpt_sum(
+                    var, getattr(var.tag, "value_var", None), jacobian=False, transformed=True
+                )
                 for var in self.free_RVs
             ]
             factors += [logpt_sum(obs, jacobian=False) for obs in self.observed_RVs]
@@ -946,7 +953,10 @@ def varlogpt(self):
         """Aesara scalar of log-probability of the unobserved random variables
         (excluding deterministic)."""
         with self:
-            factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs]
+            factors = [
+                logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True)
+                for var in self.free_RVs
+            ]
             return aet.sum(factors)
 
     @property
@@ -1000,7 +1010,7 @@ def independent_vars(self):
     @property
     def test_point(self):
         """Test point used to check that the model doesn't generate errors"""
-        return Point(((var.tag.value_var, var.tag.test_value) for var in self.free_RVs), model=self)
+        return Point(((var, var.tag.test_value) for var in self.vars), model=self)
 
     @property
     def disc_vars(self):
@@ -1042,7 +1052,7 @@ def add_coords(self, coords):
             else:
                 self.coords[name] = coords[name]
 
-    def register_rv(self, rv_var, name, data=None, total_size=None, dims=None):
+    def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, transform=None):
         """Register an (un)observed random variable with the model.
 
         Parameters
@@ -1050,11 +1060,11 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None):
         rv_var: TensorVariable
         name: str
         data: array_like (optional)
-           If data is provided, the variable is observed. If None,
-           the variable is unobserved.
+            If data is provided, the variable is observed. If None,
+            the variable is unobserved.
         total_size: scalar
             upscales logp of variable with ``coef = total_size/var.shape[0]``
-        dims : tuple
+        dims: tuple
             Dimension names for the variable.
 
         Returns
@@ -1074,17 +1084,24 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None):
             # In all other cases, the role of the value variable is taken by
             # observed data. That's why value variables are only referenced in
             # this branch of the conditional.
-            value_var = rv_var.clone()
-            value_var.name = rv_var.name
-            rv_var.tag.value_var = value_var
-
             self.free_RVs.append(rv_var)
+            value_var = rv_var.clone()
 
-            transform = rv_var.tag.transform
-            value_var.tag.transform = None
-
+            transform = transform or logp_transform(rv_var.owner.op, rv_var.owner.inputs)
             if transform is not None:
-                self.deterministics.append(rv_var)
+                value_var.tag.transform = transform
+                value_var.name = f"{rv_var.name}_{transform.name}"
+                if aesara.config.compute_test_value != "off":
+                    value_var.tag.test_value = transform.forward(value_var).tag.test_value
+
+                # The transformed variable needs to be a named variable in the
+                # model, too
+                self.named_vars[value_var.name] = value_var
+            else:
+                value_var = rv_var.clone()
+                value_var.name = rv_var.name
+
+            rv_var.tag.value_var = value_var
 
         elif isinstance(data, dict):
 
@@ -1178,7 +1195,7 @@ def __getitem__(self, key):
             except KeyError:
                 raise e
 
-    def makefn(self, outs, mode=None, *args, **kwargs):
+    def makefn(self, outs, mode=None, transformed=True, *args, **kwargs):
         """Compiles a Aesara function which returns ``outs`` and takes the variable
         ancestors of ``outs`` as inputs.
 
@@ -1192,8 +1209,11 @@ def makefn(self, outs, mode=None, *args, **kwargs):
         Compiled Aesara function
         """
         with self:
+            vars = [
+                v if not transformed else getattr(v.tag, "transformed_var", v) for v in self.vars
+            ]
             return aesara.function(
-                self.vars,
+                vars,
                 outs,
                 allow_input_downcast=True,
                 on_unused_input="ignore",
@@ -1463,7 +1483,7 @@ def fastfn(outs, mode=None, model=None):
     return model.fastfn(outs, mode)
 
 
-def Point(*args, filter_model_vars=True, **kwargs):
+def Point(*args, filter_model_vars=False, **kwargs):
     """Build a point. Uses same args as dict() does.
     Filters out variables not in the model. All keys are strings.
 
@@ -1608,6 +1628,13 @@ def make_obs_var(
         # We try to reuse the old test value
         rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape)
 
+    # An independent variable used as the generic log-likelihood input
+    # parameter (i.e. the measure-space counterpart to the sample-space
+    # variable `rv_var`).
+    value_var = rv_var.clone()
+    rv_var.tag.value_var = value_var
+    value_var.name = f"{rv_var.name}"
+
     missing_values = None
     mask = getattr(data, "mask", None)
     if mask is not None:
@@ -1623,9 +1650,7 @@ def make_obs_var(
         data = aet.set_subtensor(constant[mask.nonzero()], missing_values)
 
         # Now, we need log-likelihood-space terms for these missing values
-        value_var = rv_var.clone()
         value_var.name = f"{rv_var.name}_missing"
-        rv_var.tag.value_var = value_var
 
     elif sps.issparse(data):
         data = sparse.basic.as_sparse(data, name=name)
@@ -1713,7 +1738,6 @@ def Potential(name, var, model=None):
     model = modelcontext(model)
     var.name = model.name_for(name)
     var.tag.scaling = None
-    var.tag.transform = None
     model.potentials.append(var)
     model.add_random_variable(var)
     return var
diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py
index 740da7d129..a71af261f3 100644
--- a/pymc3/tests/test_transforms.py
+++ b/pymc3/tests/test_transforms.py
@@ -23,6 +23,7 @@
 import pymc3.distributions.transforms as tr
 
 from pymc3.aesaraf import jacobian
+from pymc3.distributions import logpt
 from pymc3.tests.checks import close_to, close_to_logical
 from pymc3.tests.helpers import SeededTest
 from pymc3.tests.test_distributions import (
@@ -209,7 +210,7 @@ def test_interval_near_boundary():
         pm.Uniform("x", testval=x0, lower=lb, upper=ub)
 
     log_prob = model.check_test_point()
-    np.testing.assert_allclose(log_prob.values, np.array([-52.68]))
+    np.testing.assert_allclose(log_prob, np.array([-52.68]))
 
 
 def test_circular():
@@ -246,42 +247,40 @@ def test_chain():
 
 
 class TestElementWiseLogp(SeededTest):
-    def build_model(self, distfam, params, shape, transform, testval=None):
+    def build_model(self, distfam, params, size, transform, testval=None):
         if testval is not None:
             testval = pm.floatX(testval)
         with pm.Model() as m:
-            distfam("x", shape=shape, transform=transform, testval=testval, **params)
+            distfam("x", size=size, transform=transform, testval=testval, **params)
         return m
 
     def check_transform_elementwise_logp(self, model):
-        x0 = model.deterministics[0]
         x = model.free_RVs[0]
-        assert x.ndim == x.logp_elemwiset.ndim
+        x0 = x.tag.value_var
+        assert x.ndim == logpt(x).ndim
 
         pt = model.test_point
-        array = np.random.randn(*pt[x.name].shape)
-        pt[x.name] = array
-        dist = x.distribution
-        logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
-        jacob_det = dist.transform_used.jacobian_det(aesara.shared(array))
-        assert x.logp_elemwiset.ndim == jacob_det.ndim
+        array = np.random.randn(*pt[x0.name].shape)
+        transform = x0.tag.transform
+        logp_nojac = logpt(x, transform.backward(array), jacobian=False)
+        jacob_det = transform.jacobian_det(aesara.shared(array))
+        assert logpt(x).ndim == jacob_det.ndim
 
         elementwiselogp = logp_nojac + jacob_det
 
-        close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol)
+        close_to(logpt(x, array).eval(), elementwiselogp.eval(), tol)
 
     def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
-        x0 = model.deterministics[0]
         x = model.free_RVs[0]
-        assert (x.ndim - 1) == x.logp_elemwiset.ndim
+        x0 = x.tag.value_var
+        assert (x.ndim - 1) == logpt(x).ndim
 
         pt = model.test_point
-        array = np.random.randn(*pt[x.name].shape)
-        pt[x.name] = array
-        dist = x.distribution
-        logp_nojac = x0.distribution.logp(dist.transform_used.backward(array))
-        jacob_det = dist.transform_used.jacobian_det(aesara.shared(array))
-        assert x.logp_elemwiset.ndim == jacob_det.ndim
+        array = np.random.randn(*pt[x0.name].shape)
+        transform = x0.tag.transform
+        logp_nojac = logpt(x, transform.backward(array))
+        jacob_det = transform.jacobian_det(aesara.shared(array))
+        assert logpt(x).ndim == jacob_det.ndim
 
         if vect_opt == 0:
             # the original distribution is univariate
@@ -289,7 +288,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
         else:
             elementwiselogp = logp_nojac + jacob_det
         # Hack to get relative tolerance
-        a = x.logp_elemwise(pt)
+        a = logpt(x, array).eval()
         b = elementwiselogp.eval()
         close_to(a, b, np.abs(0.5 * (a + b) * tol))
 
@@ -301,13 +300,15 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0):
             (np.ones(3) * 10.0, (4, 3)),
         ],
     )
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_half_normal(self, sd, shape):
-        model = self.build_model(pm.HalfNormal, {"sd": sd}, shape=shape, transform=tr.log)
+        model = self.build_model(pm.HalfNormal, {"sd": sd}, size=shape, transform=tr.log)
         self.check_transform_elementwise_logp(model)
 
     @pytest.mark.parametrize("lam,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))])
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_exponential(self, lam, shape):
-        model = self.build_model(pm.Exponential, {"lam": lam}, shape=shape, transform=tr.log)
+        model = self.build_model(pm.Exponential, {"lam": lam}, size=shape, transform=tr.log)
         self.check_transform_elementwise_logp(model)
 
     @pytest.mark.parametrize(
@@ -318,10 +319,9 @@ def test_exponential(self, lam, shape):
             (np.ones(3), np.ones(3), (4, 3)),
         ],
     )
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_beta(self, a, b, shape):
-        model = self.build_model(
-            pm.Beta, {"alpha": a, "beta": b}, shape=shape, transform=tr.logodds
-        )
+        model = self.build_model(pm.Beta, {"alpha": a, "beta": b}, size=shape, transform=tr.logodds)
         self.check_transform_elementwise_logp(model)
 
     @pytest.mark.parametrize(
@@ -335,16 +335,17 @@ def test_beta(self, a, b, shape):
     def test_uniform(self, lower, upper, shape):
         interval = tr.Interval(lower, upper)
         model = self.build_model(
-            pm.Uniform, {"lower": lower, "upper": upper}, shape=shape, transform=interval
+            pm.Uniform, {"lower": lower, "upper": upper}, size=shape, transform=interval
         )
         self.check_transform_elementwise_logp(model)
 
     @pytest.mark.parametrize(
         "mu,kappa,shape", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))]
     )
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_vonmises(self, mu, kappa, shape):
         model = self.build_model(
-            pm.VonMises, {"mu": mu, "kappa": kappa}, shape=shape, transform=tr.circular
+            pm.VonMises, {"mu": mu, "kappa": kappa}, size=shape, transform=tr.circular
         )
         self.check_transform_elementwise_logp(model)
 
@@ -352,14 +353,14 @@ def test_vonmises(self, mu, kappa, shape):
         "a,shape", [(np.ones(2), 2), (np.ones((2, 3)) * 0.5, (2, 3)), (np.ones(3), (4, 3))]
     )
     def test_dirichlet(self, a, shape):
-        model = self.build_model(pm.Dirichlet, {"a": a}, shape=shape, transform=tr.stick_breaking)
+        model = self.build_model(pm.Dirichlet, {"a": a}, size=shape, transform=tr.stick_breaking)
         self.check_vectortransform_elementwise_logp(model, vect_opt=1)
 
     def test_normal_ordered(self):
         model = self.build_model(
             pm.Normal,
             {"mu": 0.0, "sd": 1.0},
-            shape=3,
+            size=3,
             testval=np.asarray([-1.0, 1.0, 4.0]),
             transform=tr.ordered,
         )
@@ -378,7 +379,7 @@ def test_half_normal_ordered(self, sd, shape):
         model = self.build_model(
             pm.HalfNormal,
             {"sd": sd},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=tr.Chain([tr.log, tr.ordered]),
         )
@@ -390,7 +391,7 @@ def test_exponential_ordered(self, lam, shape):
         model = self.build_model(
             pm.Exponential,
             {"lam": lam},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=tr.Chain([tr.log, tr.ordered]),
         )
@@ -408,7 +409,7 @@ def test_beta_ordered(self, a, b, shape):
         model = self.build_model(
             pm.Beta,
             {"alpha": a, "beta": b},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=tr.Chain([tr.logodds, tr.ordered]),
         )
@@ -424,7 +425,7 @@ def test_uniform_ordered(self, lower, upper, shape):
         model = self.build_model(
             pm.Uniform,
             {"lower": lower, "upper": upper},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=tr.Chain([interval, tr.ordered]),
         )
@@ -438,7 +439,7 @@ def test_vonmises_ordered(self, mu, kappa, shape):
         model = self.build_model(
             pm.VonMises,
             {"mu": mu, "kappa": kappa},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=tr.Chain([tr.circular, tr.ordered]),
         )
@@ -457,7 +458,7 @@ def test_uniform_other(self, lower, upper, shape, transform):
         model = self.build_model(
             pm.Uniform,
             {"lower": lower, "upper": upper},
-            shape=shape,
+            size=shape,
             testval=testval,
             transform=transform,
         )
@@ -473,6 +474,6 @@ def test_uniform_other(self, lower, upper, shape, transform):
     def test_mvnormal_ordered(self, mu, cov, shape):
         testval = np.sort(np.random.randn(*shape))
         model = self.build_model(
-            pm.MvNormal, {"mu": mu, "cov": cov}, shape=shape, testval=testval, transform=tr.ordered
+            pm.MvNormal, {"mu": mu, "cov": cov}, size=shape, testval=testval, transform=tr.ordered
         )
         self.check_vectortransform_elementwise_logp(model, vect_opt=1)

From 71301933668586529f5d216876a1be88a300ce07 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Sat, 13 Mar 2021 23:28:17 -0600
Subject: [PATCH 04/16] Remove remaining v3 sampling code

---
 pymc3/distributions/posterior_predictive.py | 699 --------------------
 pymc3/tests/test_data_container.py          |   4 +-
 pymc3/tests/test_distributions_random.py    |   2 +-
 pymc3/tests/test_modelcontext.py            |  22 -
 pymc3/tests/test_posterior_predictive.py    |  39 --
 pymc3/tests/test_random.py                  | 187 ------
 6 files changed, 3 insertions(+), 950 deletions(-)
 delete mode 100644 pymc3/distributions/posterior_predictive.py
 delete mode 100644 pymc3/tests/test_posterior_predictive.py
 delete mode 100644 pymc3/tests/test_random.py

diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
deleted file mode 100644
index 1125ae9357..0000000000
--- a/pymc3/distributions/posterior_predictive.py
+++ /dev/null
@@ -1,699 +0,0 @@
-from __future__ import annotations
-
-import contextvars
-import logging
-import numbers
-import warnings
-
-from collections import UserDict
-from contextlib import AbstractContextManager
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, cast, overload
-
-import aesara.graph.basic
-import aesara.graph.fg
-import numpy as np
-
-from aesara.compile.sharedvalue import SharedVariable
-from aesara.graph.basic import Constant
-from aesara.tensor.var import TensorVariable
-from arviz import InferenceData
-from typing_extensions import Literal, Protocol
-from xarray import Dataset
-
-from pymc3.backends.base import MultiTrace
-from pymc3.distributions.distribution import (
-    _compile_aesara_function,
-    _DrawValuesContext,
-    _DrawValuesContextBlocker,
-    is_fast_drawable,
-    vectorized_ppc,
-)
-from pymc3.exceptions import IncorrectArgumentsError
-from pymc3.model import (
-    Model,
-    MultiObservedRV,
-    ObservedRV,
-    get_named_nodes_and_relations,
-    modelcontext,
-)
-from pymc3.util import chains_and_samples, dataset_to_point_list, get_var_name
-
-# Failing tests:
-#    test_mixture_random_shape::test_mixture_random_shape
-#
-
-Point = Dict[str, np.ndarray]
-
-
-class HasName(Protocol):
-    name: str
-
-
-class _TraceDict(UserDict):
-    """This class extends the standard trace-based representation
-    of traces by adding some helpful attributes used in posterior predictive
-    sampling.
-
-    Attributes
-    ~~~~~~~~~~
-        varnames: list of strings"""
-
-    varnames: list[str]
-    _len: int
-    data: Point
-
-    def __init__(
-        self,
-        point_list: list[Point] | None = None,
-        multi_trace: MultiTrace | None = None,
-        dict_: Point | None = None,
-    ):
-        """"""
-        if multi_trace:
-            assert point_list is None and dict_ is None
-            self.data = {}
-            self._len = sum(len(multi_trace._straces[chain]) for chain in multi_trace.chains)
-            self.varnames = multi_trace.varnames
-            for vn in multi_trace.varnames:
-                self.data[vn] = multi_trace.get_values(vn)
-        if point_list is not None:
-            assert multi_trace is None and dict_ is None
-            self.varnames = varnames = list(point_list[0].keys())
-            rep_values = [point_list[0][varname] for varname in varnames]
-            # translate the point list.
-            self._len = num_points = len(point_list)
-
-            def arr_for(val):
-                if np.isscalar(val):
-                    return np.ndarray(shape=(num_points,))
-                elif isinstance(val, np.ndarray):
-                    shp = (num_points,) + val.shape
-                    return np.ndarray(shape=shp)
-                else:
-                    raise TypeError(
-                        "Illegal object %s of type %s as value of variable in point list."
-                        % (val, type(val))
-                    )
-
-            self.data = {name: arr_for(val) for name, val in zip(varnames, rep_values)}
-            for i, point in enumerate(point_list):
-                for var, value in point.items():
-                    self.data[var][i] = value
-        if dict_ is not None:
-            assert point_list is None and multi_trace is None
-            self.data = dict_
-            self.varnames = list(dict_.keys())
-            self._len = dict_[self.varnames[0]].shape[0]
-        assert self.varnames is not None and self._len is not None and self.data is not None
-
-    def __len__(self) -> int:
-        return self._len
-
-    def _extract_slice(self, slc: slice) -> _TraceDict:
-        sliced_dict: Point = {}
-
-        def apply_slice(arr: np.ndarray) -> np.ndarray:
-            if len(arr.shape) == 1:
-                return arr[slc]
-            else:
-                return arr[slc, :]
-
-        for vn, arr in self.data.items():
-            sliced_dict[vn] = apply_slice(arr)
-        return _TraceDict(dict_=sliced_dict)
-
-    @overload
-    def __getitem__(self, item: str | HasName) -> np.ndarray:
-        ...
-
-    @overload
-    def __getitem__(self, item: slice | int) -> _TraceDict:
-        ...
-
-    def __getitem__(self, item):
-        if isinstance(item, str):
-            return super().__getitem__(item)
-        elif isinstance(item, slice):
-            return self._extract_slice(item)
-        elif isinstance(item, int):
-            return _TraceDict(dict_={k: np.atleast_1d(v[item]) for k, v in self.data.items()})
-        elif hasattr(item, "name"):
-            return super().__getitem__(item.name)
-        else:
-            raise IndexError("Illegal index %s for _TraceDict" % str(item))
-
-
-def fast_sample_posterior_predictive(
-    trace: MultiTrace | Dataset | InferenceData | list[dict[str, np.ndarray]],
-    samples: int | None = None,
-    model: Model | None = None,
-    var_names: list[str] | None = None,
-    keep_size: bool = False,
-    random_seed=None,
-) -> dict[str, np.ndarray]:
-    """Generate posterior predictive samples from a model given a trace.
-
-    This is a vectorized alternative to the standard ``sample_posterior_predictive`` function.
-    It aims to be as compatible as possible with the original API, and is significantly
-    faster.  Both posterior predictive sampling functions have some remaining issues, and
-    we encourage users to verify agreement across the results of both functions for the time
-    being.
-
-    Parameters
-    ----------
-    trace: MultiTrace, xarray.Dataset, InferenceData, or List of points (dictionary)
-        Trace generated from MCMC sampling.
-    samples: int, optional
-        Number of posterior predictive samples to generate. Defaults to one posterior predictive
-        sample per posterior sample, that is, the number of draws times the number of chains. It
-        is not recommended to modify this value; when modified, some chains may not be represented
-        in the posterior predictive sample.
-    model: Model (optional if in `with` context)
-        Model used to generate `trace`
-    var_names: Iterable[str]
-        List of vars to sample.
-    keep_size: bool, optional
-        Force posterior predictive sample to have the same shape as posterior and sample stats
-        data: ``(nchains, ndraws, ...)``.
-    random_seed: int
-        Seed for the random number generator.
-
-    Returns
-    -------
-    samples: dict
-        Dictionary with the variable names as keys, and values numpy arrays containing
-        posterior predictive samples.
-    """
-
-    ### Implementation note: primarily this function canonicalizes the arguments:
-    ### Establishing the model context, wrangling the number of samples,
-    ### Canonicalizing the trace argument into a _TraceDict object and fitting it
-    ### to the requested number of samples.  Then it invokes posterior_predictive_draw_values
-    ### *repeatedly*.  It does this repeatedly, because the trace argument is set up to be
-    ### the same as the number of samples. So if the number of samples requested is
-    ### greater than the number of samples in the trace parameter, we sample repeatedly.  This
-    ### makes the shape issues just a little easier to deal with.
-
-    if isinstance(trace, InferenceData):
-        nchains, ndraws = chains_and_samples(trace)
-        trace = dataset_to_point_list(trace.posterior)
-    elif isinstance(trace, Dataset):
-        nchains, ndraws = chains_and_samples(trace)
-        trace = dataset_to_point_list(trace)
-    elif isinstance(trace, MultiTrace):
-        nchains = trace.nchains
-        ndraws = len(trace)
-    else:
-        if keep_size:
-            # arguably this should be just a warning.
-            raise IncorrectArgumentsError(
-                "For keep_size, cannot identify chains and length from %s.", trace
-            )
-
-    model = modelcontext(model)
-    assert model is not None
-
-    if model.potentials:
-        warnings.warn(
-            "The effect of Potentials on other parameters is ignored during posterior predictive sampling. "
-            "This is likely to lead to invalid or biased predictive samples.",
-            UserWarning,
-        )
-
-    with model:
-
-        if keep_size and samples is not None:
-            raise IncorrectArgumentsError("Should not specify both keep_size and samples arguments")
-
-        if isinstance(trace, list) and all(isinstance(x, dict) for x in trace):
-            _trace = _TraceDict(point_list=trace)
-        elif isinstance(trace, MultiTrace):
-            _trace = _TraceDict(multi_trace=trace)
-        else:
-            raise TypeError(
-                "Unable to generate posterior predictive samples from argument of type %s"
-                % type(trace)
-            )
-
-        len_trace = len(_trace)
-
-        assert isinstance(_trace, _TraceDict)
-
-        _samples: list[int] = []
-        # temporary replacement for more complicated logic.
-        max_samples: int = len_trace
-        if samples is None or samples == max_samples:
-            _samples = [max_samples]
-        elif samples < max_samples:
-            warnings.warn(
-                "samples parameter is smaller than nchains times ndraws, some draws "
-                "and/or chains may not be represented in the returned posterior "
-                "predictive sample"
-            )
-            # if this is less than the number of samples in the trace, take a slice and
-            # work with that.
-            _trace = _trace[slice(samples)]
-            _samples = [samples]
-        elif samples > max_samples:
-            full, rem = divmod(samples, max_samples)
-            _samples = (full * [max_samples]) + ([rem] if rem != 0 else [])
-        else:
-            raise IncorrectArgumentsError(
-                "Unexpected combination of samples (%s) and max_samples (%d)"
-                % (samples, max_samples)
-            )
-
-        if var_names is None:
-            vars = model.observed_RVs
-        else:
-            vars = [model[x] for x in var_names]
-
-        if random_seed is not None:
-            np.random.seed(random_seed)
-
-        if TYPE_CHECKING:
-            _ETPParent = UserDict[str, np.ndarray]  # this is only processed by mypy
-        else:
-            # this is not seen by mypy but will be executed at runtime.
-            _ETPParent = UserDict
-
-        class _ExtendableTrace(_ETPParent):
-            def extend_trace(self, trace: dict[str, np.ndarray]) -> None:
-                for k, v in trace.items():
-                    if k in self.data:
-                        self.data[k] = np.concatenate((self.data[k], v))
-                    else:
-                        self.data[k] = v
-
-        ppc_trace = _ExtendableTrace()
-        for s in _samples:
-            strace = _trace if s == len_trace else _trace[slice(0, s)]
-            try:
-                values = posterior_predictive_draw_values(cast(List[Any], vars), strace, s)
-                new_trace: dict[str, np.ndarray] = {k.name: v for (k, v) in zip(vars, values)}
-                ppc_trace.extend_trace(new_trace)
-            except KeyboardInterrupt:
-                pass
-
-    if keep_size:
-        return {k: ary.reshape((nchains, ndraws, *ary.shape[1:])) for k, ary in ppc_trace.items()}
-    # this gets us a Dict[str, np.ndarray] instead of my wrapped equiv.
-    return ppc_trace.data
-
-
-def posterior_predictive_draw_values(
-    vars: list[Any], trace: _TraceDict, samples: int
-) -> list[np.ndarray]:
-    with _PosteriorPredictiveSampler(vars, trace, samples, None) as sampler:
-        return sampler.draw_values()
-
-
-class _PosteriorPredictiveSampler(AbstractContextManager):
-    """The process of posterior predictive sampling is quite complicated so this provides a central data store."""
-
-    # inputs
-    vars: list[Any]
-    trace: _TraceDict
-    samples: int
-    size: int | None  # not supported!
-
-    # other slots
-    logger: logging.Logger
-
-    # for the search
-    evaluated: dict[int, np.ndarray]
-    symbolic_params: list[tuple[int, Any]]
-
-    # set by make_graph...
-    leaf_nodes: dict[str, Any]
-    named_nodes_parents: dict[str, Any]
-    named_nodes_children: dict[str, Any]
-    _tok: contextvars.Token
-
-    def __init__(self, vars, trace: _TraceDict, samples, model: Model | None, size=None):
-        if size is not None:
-            raise NotImplementedError(
-                "sample_posterior_predictive does not support the size argument at this time."
-            )
-        assert vars is not None
-        self.vars = vars
-        self.trace = trace
-        self.samples = samples
-        self.size = size
-        self.logger = logging.getLogger("posterior_predictive")
-
-    def __enter__(self) -> "_PosteriorPredictiveSampler":
-        self._tok = vectorized_ppc.set(posterior_predictive_draw_values)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
-        vectorized_ppc.reset(self._tok)
-        return False
-
-    def draw_values(self) -> list[np.ndarray]:
-        vars = self.vars
-        trace = self.trace
-        samples = self.samples
-        # size = self.size
-        params = dict(enumerate(vars))
-
-        with _DrawValuesContext() as context:
-            self.init()
-            self.make_graph()
-
-            drawn = context.drawn_vars
-
-            # Init givens and the stack of nodes to try to `_draw_value` from
-            givens = {
-                p.name: (p, v)
-                for (p, samples), v in drawn.items()
-                if getattr(p, "name", None) is not None
-            }
-            stack = list(self.leaf_nodes.values())  # A queue would be more appropriate
-
-            while stack:
-                next_ = stack.pop(0)
-                if (next_, samples) in drawn:
-                    # If the node already has a givens value, skip it
-                    continue
-                elif isinstance(next_, (Constant, SharedVariable)):
-                    # If the node is a aesara.tensor.TensorConstant or a
-                    # aesara.tensor.sharedvar.SharedVariable, its value will be
-                    # available automatically in _compile_aesara_function so
-                    # we can skip it. Furthermore, if this node was treated as a
-                    # TensorVariable that should be compiled by aesara in
-                    # _compile_aesara_function, it would raise a `TypeError:
-                    # ('Constants not allowed in param list', ...)` for
-                    # TensorConstant, and a `TypeError: Cannot use a shared
-                    # variable (...) as explicit input` for SharedVariable.
-                    # ObservedRV and MultiObservedRV instances are ViewOPs
-                    # of TensorConstants or SharedVariables, we must add them
-                    # to the stack or risk evaluating deterministics with the
-                    # wrong values (issue #3354)
-                    stack.extend(
-                        [
-                            node
-                            for node in self.named_nodes_parents[next_]
-                            if isinstance(node, (ObservedRV, MultiObservedRV))
-                            and (node, samples) not in drawn
-                        ]
-                    )
-                    continue
-                else:
-                    # If the node does not have a givens value, try to draw it.
-                    # The named node's children givens values must also be taken
-                    # into account.
-                    children = self.named_nodes_children[next_]
-                    temp_givens = [givens[k] for k in givens if k in children]
-                    try:
-                        # This may fail for autotransformed RVs, which don't
-                        # have the random method
-                        value = self.draw_value(next_, trace=trace, givens=temp_givens)
-                        assert isinstance(value, np.ndarray)
-                        givens[next_.name] = (next_, value)
-                        drawn[(next_, samples)] = value
-                    except aesara.graph.fg.MissingInputError:
-                        # The node failed, so we must add the node's parents to
-                        # the stack of nodes to try to draw from. We exclude the
-                        # nodes in the `params` list.
-                        stack.extend(
-                            [
-                                node
-                                for node in self.named_nodes_parents[next_]
-                                if node is not None and (node, samples) not in drawn
-                            ]
-                        )
-
-            # the below makes sure the graph is evaluated in order
-            # test_distributions_random::TestDrawValues::test_draw_order fails without it
-            # The remaining params that must be drawn are all hashable
-            to_eval: set[int] = set()
-            missing_inputs: set[int] = {j for j, p in self.symbolic_params}
-
-            while to_eval or missing_inputs:
-                if to_eval == missing_inputs:
-                    raise ValueError(
-                        "Cannot resolve inputs for {}".format(
-                            [get_var_name(trace.varnames[j]) for j in to_eval]
-                        )
-                    )
-                to_eval = set(missing_inputs)
-                missing_inputs = set()
-                for param_idx in to_eval:
-                    param = vars[param_idx]
-                    drawn = context.drawn_vars
-                    if (param, samples) in drawn:
-                        self.evaluated[param_idx] = drawn[(param, samples)]
-                    else:
-                        try:
-                            if param in self.named_nodes_children:
-                                for node in self.named_nodes_children[param]:
-                                    if node.name not in givens and (node, samples) in drawn:
-                                        givens[node.name] = (
-                                            node,
-                                            drawn[(node, samples)],
-                                        )
-                            value = self.draw_value(param, trace=self.trace, givens=givens.values())
-                            assert isinstance(value, np.ndarray)
-                            self.evaluated[param_idx] = drawn[(param, samples)] = value
-                            givens[param.name] = (param, value)
-                        except aesara.graph.fg.MissingInputError:
-                            missing_inputs.add(param_idx)
-        return [self.evaluated[j] for j in params]
-
-    def init(self) -> None:
-        """This method carries out the initialization phase of sampling
-        from the posterior predictive distribution.  Notably it initializes the
-        ``_DrawValuesContext`` bookkeeping object and evaluates the "fast drawable"
-        parts of the model."""
-        vars: list[Any] = self.vars
-        trace: _TraceDict = self.trace
-        samples: int = self.samples
-        leaf_nodes: dict[str, Any]
-        named_nodes_parents: dict[str, Any]
-        named_nodes_children: dict[str, Any]
-
-        # initialization phase
-        context = _DrawValuesContext.get_context()
-        assert isinstance(context, _DrawValuesContext)
-        with context:
-            drawn = context.drawn_vars
-            evaluated: dict[int, Any] = {}
-            symbolic_params = []
-            for i, var in enumerate(vars):
-                if is_fast_drawable(var):
-                    evaluated[i] = self.draw_value(var)
-                    continue
-                name = getattr(var, "name", None)
-                if (var, samples) in drawn:
-                    evaluated[i] = drawn[(var, samples)]
-                    # We filter out Deterministics by checking for `model` attribute
-                elif name is not None and hasattr(var, "model") and name in trace.varnames:
-                    # param.name is in the trace.  Record it as drawn and evaluated
-                    drawn[(var, samples)] = evaluated[i] = trace[cast(str, name)]
-                else:
-                    # param still needs to be drawn
-                    symbolic_params.append((i, var))
-        self.evaluated = evaluated
-        self.symbolic_params = symbolic_params
-
-    def make_graph(self) -> None:
-        # Distribution parameters may be nodes which have named node-inputs
-        # specified in the point. Need to find the node-inputs, their
-        # parents and children to replace them.
-        symbolic_params = self.symbolic_params
-        self.leaf_nodes = {}
-        self.named_nodes_parents = {}
-        self.named_nodes_children = {}
-        for _, param in symbolic_params:
-            if hasattr(param, "name"):
-                # Get the named nodes under the `param` node
-                nn, nnp, nnc = get_named_nodes_and_relations(param)
-                self.leaf_nodes.update(nn)
-                # Update the discovered parental relationships
-                for k in nnp.keys():
-                    if k not in self.named_nodes_parents.keys():
-                        self.named_nodes_parents[k] = nnp[k]
-                    else:
-                        self.named_nodes_parents[k].update(nnp[k])
-                # Update the discovered child relationships
-                for k in nnc.keys():
-                    if k not in self.named_nodes_children.keys():
-                        self.named_nodes_children[k] = nnc[k]
-                    else:
-                        self.named_nodes_children[k].update(nnc[k])
-
-    def draw_value(self, param, trace: _TraceDict | None = None, givens=None):
-        """Draw a set of random values from a distribution or return a constant.
-
-        Parameters
-        ----------
-        param: number, array like, aesara variable or pymc3 random variable
-            The value or distribution. Constants or shared variables
-            will be converted to an array and returned. Aesara variables
-            are evaluated. If `param` is a pymc3 random variable, draw
-            values from it and return that (as ``np.ndarray``), unless a
-            value is specified in the ``trace``.
-        trace: pm.MultiTrace, optional
-            A dictionary from pymc3 variable names to samples of their values
-            used to provide context for evaluating ``param``.
-        givens: dict, optional
-            A dictionary from aesara variables to their values. These values
-            are used to evaluate ``param`` if it is a aesara variable.
-        """
-        samples = self.samples
-
-        def random_sample(
-            meth: Callable[..., np.ndarray],
-            param,
-            point: _TraceDict,
-            size: int,
-            shape: tuple[int, ...],
-        ) -> np.ndarray:
-            val = meth(point=point, size=size)
-            try:
-                assert val.shape == (size,) + shape, (
-                    "Sampling from random of %s yields wrong shape" % param
-                )
-            # error-quashing here is *extremely* ugly, but it seems to be what the logic in DensityDist wants.
-            except AssertionError as e:
-                if (
-                    hasattr(param, "distribution")
-                    and hasattr(param.distribution, "wrap_random_with_dist_shape")
-                    and not param.distribution.wrap_random_with_dist_shape
-                ):
-                    pass
-                else:
-                    raise e
-
-            return val
-
-        if isinstance(param, (numbers.Number, np.ndarray)):
-            return param
-        elif isinstance(param, Constant):
-            return param.value
-        elif isinstance(param, SharedVariable):
-            return param.get_value()
-        elif isinstance(param, (TensorVariable, MultiObservedRV)):
-            if hasattr(param, "model") and trace and param.name in trace.varnames:
-                return trace[param.name]
-            elif hasattr(param, "random") and param.random is not None:
-                model = modelcontext(None)
-                assert isinstance(model, Model)
-                shape: tuple[int, ...] = tuple(_param_shape(param, model))
-                return random_sample(param.random, param, point=trace, size=samples, shape=shape)
-            elif (
-                hasattr(param, "distribution")
-                and hasattr(param.distribution, "random")
-                and param.distribution.random is not None
-            ):
-                if hasattr(param, "observations"):
-                    # shape inspection for ObservedRV
-                    dist_tmp = param.distribution
-                    try:
-                        distshape: tuple[int, ...] = tuple(param.observations.shape.eval())
-                    except AttributeError:
-                        distshape = tuple(param.observations.shape)
-
-                    dist_tmp.shape = distshape
-                    try:
-                        return random_sample(
-                            dist_tmp.random,
-                            param,
-                            point=trace,
-                            size=samples,
-                            shape=distshape,
-                        )
-                    except (ValueError, TypeError):
-                        # reset shape to account for shape changes
-                        # with aesara.shared inputs
-                        dist_tmp.shape = ()
-                        # We want to draw values to infer the dist_shape,
-                        # we don't want to store these drawn values to the context
-                        with _DrawValuesContextBlocker():
-                            point = trace[0] if trace else None
-                            temp_val = np.atleast_1d(dist_tmp.random(point=point, size=None))
-                        # if hasattr(param, 'name') and param.name == 'obs':
-                        #     import pdb; pdb.set_trace()
-                        # Sometimes point may change the size of val but not the
-                        # distribution's shape
-                        if point and samples is not None:
-                            temp_size = np.atleast_1d(samples)
-                            if all(temp_val.shape[: len(temp_size)] == temp_size):
-                                dist_tmp.shape = tuple(temp_val.shape[len(temp_size) :])
-                            else:
-                                dist_tmp.shape = tuple(temp_val.shape)
-                        # I am not sure why I need to do this, but I do in order to trim off a
-                        # degenerate dimension [2019/09/05:rpg]
-                        if dist_tmp.shape[0] == 1 and len(dist_tmp.shape) > 1:
-                            dist_tmp.shape = dist_tmp.shape[1:]
-                        return random_sample(
-                            dist_tmp.random,
-                            point=trace,
-                            size=samples,
-                            param=param,
-                            shape=tuple(dist_tmp.shape),
-                        )
-                else:  # has a distribution, but no observations
-                    distshape = tuple(param.distribution.shape)
-                    return random_sample(
-                        meth=param.distribution.random,
-                        param=param,
-                        point=trace,
-                        size=samples,
-                        shape=distshape,
-                    )
-            # NOTE: I think the following is already vectorized.
-            else:
-                if givens:
-                    variables, values = list(zip(*givens))
-                else:
-                    variables = values = []
-                # We only truly care if the ancestors of param that were given
-                # value have the matching dshape and val.shape
-                param_ancestors = set(
-                    aesara.graph.basic.ancestors([param], blockers=list(variables))
-                )
-                inputs = [
-                    (var, val) for var, val in zip(variables, values) if var in param_ancestors
-                ]
-                if inputs:
-                    input_vars, input_vals = list(zip(*inputs))
-                else:
-                    input_vars = []
-                    input_vals = []
-                func = _compile_aesara_function(param, input_vars)
-                if not input_vars:
-                    assert input_vals == []  # AFAICT if there are now vars, there can't be vals
-                    output = func(*input_vals)
-                    if hasattr(output, "shape"):
-                        val = np.repeat(np.expand_dims(output, 0), samples, axis=0)
-                    else:
-                        val = np.full(samples, output)
-
-                else:
-                    val = func(*input_vals)
-                    # np.ndarray([func(*input_vals) for inp in zip(*input_vals)])
-                return val
-        raise ValueError("Unexpected type in draw_value: %s" % type(param))
-
-
-def _param_shape(var_desig, model: Model) -> tuple[int, ...]:
-    if isinstance(var_desig, str):
-        v = model[var_desig]
-    else:
-        v = var_desig
-    if hasattr(v, "observations"):
-        try:
-            # To get shape of _observed_ data container `pm.Data`
-            # (wrapper for SharedVariable) we evaluate it.
-            shape = tuple(v.observations.shape.eval())
-        except AttributeError:
-            shape = v.observations.shape
-    elif hasattr(v, "dshape"):
-        shape = v.dshape
-    else:
-        shape = v.tag.test_value.shape
-    if shape == (1,):
-        shape = tuple()
-    return shape
diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index e61e38d3d3..c0fe7e82f3 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -151,7 +151,7 @@ def test_shared_scalar_as_rv_input(self):
             v = pm.Normal("v", mu=shared_var, size=1)
 
         np.testing.assert_allclose(
-            logpt(v, 5.0).eval(),
+            logpt(v, np.r_[5.0]).eval(),
             -0.91893853,
             rtol=1e-5,
         )
@@ -159,7 +159,7 @@ def test_shared_scalar_as_rv_input(self):
         shared_var.set_value(10.0)
 
         np.testing.assert_allclose(
-            logpt(v, 10.0).eval(),
+            logpt(v, np.r_[10.0]).eval(),
             -0.91893853,
             rtol=1e-5,
         )
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index 0dd4e19280..95f379836a 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -29,7 +29,7 @@
 import pymc3 as pm
 
 from pymc3.distributions.dist_math import clipped_beta_rvs
-from pymc3.distributions.distribution import to_tuple
+from pymc3.distributions.shape_utils import to_tuple
 from pymc3.exceptions import ShapeError
 from pymc3.tests.helpers import SeededTest
 from pymc3.tests.test_distributions import (
diff --git a/pymc3/tests/test_modelcontext.py b/pymc3/tests/test_modelcontext.py
index b7d44ca63c..ba14f90921 100644
--- a/pymc3/tests/test_modelcontext.py
+++ b/pymc3/tests/test_modelcontext.py
@@ -17,10 +17,6 @@
 from pytest import raises
 
 from pymc3 import Model, Normal
-from pymc3.distributions.distribution import (
-    _DrawValuesContext,
-    _DrawValuesContextBlocker,
-)
 from pymc3.model import modelcontext
 
 
@@ -78,24 +74,6 @@ def test_mixed_contexts():
         with modelB:
             assert Model.get_context() == modelB
             assert modelcontext(None) == modelB
-            dvc = _DrawValuesContext()
-            with dvc:
-                assert Model.get_context() == modelB
-                assert modelcontext(None) == modelB
-                assert _DrawValuesContext.get_context() == dvc
-                dvcb = _DrawValuesContextBlocker()
-                with dvcb:
-                    assert _DrawValuesContext.get_context() == dvcb
-                    assert _DrawValuesContextBlocker.get_context() == dvcb
-                assert _DrawValuesContext.get_context() == dvc
-                assert _DrawValuesContextBlocker.get_context() is dvc
-                assert Model.get_context() == modelB
-                assert modelcontext(None) == modelB
-            assert _DrawValuesContext.get_context(error_if_none=False) is None
-            with raises(TypeError):
-                _DrawValuesContext.get_context()
-            assert Model.get_context() == modelB
-            assert modelcontext(None) == modelB
         assert Model.get_context() == modelA
         assert modelcontext(None) == modelA
     assert Model.get_context(error_if_none=False) is None
diff --git a/pymc3/tests/test_posterior_predictive.py b/pymc3/tests/test_posterior_predictive.py
deleted file mode 100644
index 7a19ac4a59..0000000000
--- a/pymc3/tests/test_posterior_predictive.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import numpy as np
-
-import pymc3 as pm
-
-from pymc3.backends.ndarray import point_list_to_multitrace
-from pymc3.distributions.posterior_predictive import _TraceDict
-
-
-def test_translate_point_list():
-    with pm.Model() as model:
-        mu = pm.Normal("mu", 0.0, 1.0)
-        a = pm.Normal("a", mu=mu, sigma=1, observed=0.0)
-        mt = point_list_to_multitrace([model.test_point], model)
-        assert isinstance(mt, pm.backends.base.MultiTrace)
-        assert {"mu"} == set(mt.varnames)
-        assert len(mt) == 1
-
-
-def test_build_TraceDict():
-    with pm.Model() as model:
-        mu = pm.Normal("mu", 0.0, 1.0)
-        a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
-        trace = pm.sample(chains=2, draws=500)
-        dict = _TraceDict(multi_trace=trace)
-        assert isinstance(dict, _TraceDict)
-        assert len(dict) == 1000
-        np.testing.assert_array_equal(trace["mu"], dict["mu"])
-        assert set(trace.varnames) == set(dict.varnames) == {"mu"}
-
-
-def test_build_TraceDict_point_list():
-    with pm.Model() as model:
-        mu = pm.Normal("mu", 0.0, 1.0)
-        a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
-        dict = _TraceDict(point_list=[model.test_point])
-        assert set(dict.varnames) == {"mu"}
-        assert len(dict) == 1
-        assert len(dict["mu"]) == 1
-        assert dict["mu"][0] == 0.0
diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py
deleted file mode 100644
index f88e6f75f9..0000000000
--- a/pymc3/tests/test_random.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#   Copyright 2020 The PyMC Developers
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-import aesara
-import aesara.tensor as aet
-import numpy as np
-import numpy.testing as npt
-import pytest
-
-from numpy import random as nr
-
-import pymc3 as pm
-
-from pymc3.distributions.distribution import _draw_value, draw_values
-from pymc3.tests.helpers import SeededTest
-
-
-def test_draw_value():
-    npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6])
-    npt.assert_equal(_draw_value(np.array(5.0)), 5)
-
-    npt.assert_equal(_draw_value(aet.constant([5.0, 6.0])), [5, 6])
-    assert _draw_value(aet.constant(5)) == 5
-    npt.assert_equal(_draw_value(2 * aet.constant([5.0, 6.0])), [10, 12])
-
-    val = aesara.shared(np.array([5.0, 6.0]))
-    npt.assert_equal(_draw_value(val), [5, 6])
-    npt.assert_equal(_draw_value(2 * val), [10, 12])
-
-    a = aet.scalar("a")
-    a.tag.test_value = 6
-    npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2)
-
-    assert _draw_value(5) == 5
-    assert _draw_value(5.0) == 5
-    assert isinstance(_draw_value(5.0), type(5.0))
-    assert isinstance(_draw_value(5), type(5))
-
-    with pm.Model():
-        mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
-        a = pm.Normal("a", mu=mu, sigma=5, shape=2)
-
-    val1 = _draw_value(a)
-    val2 = _draw_value(a)
-    assert np.all(val1 != val2)
-
-    with pytest.raises(ValueError) as err:
-        _draw_value([])
-    err.match("Unexpected type")
-
-
-class TestDrawValues:
-    def test_empty(self):
-        assert draw_values([]) == []
-
-    def test_vals(self):
-        npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6])
-        npt.assert_equal(draw_values([np.array(5.0)])[0], 5)
-
-        npt.assert_equal(draw_values([aet.constant([5.0, 6.0])])[0], [5, 6])
-        assert draw_values([aet.constant(5)])[0] == 5
-        npt.assert_equal(draw_values([2 * aet.constant([5.0, 6.0])])[0], [10, 12])
-
-        val = aesara.shared(np.array([5.0, 6.0]))
-        npt.assert_equal(draw_values([val])[0], [5, 6])
-        npt.assert_equal(draw_values([2 * val])[0], [10, 12])
-
-    def test_simple_model(self):
-        with pm.Model():
-            mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
-            a = pm.Normal("a", mu=mu, sigma=5, shape=2)
-
-        val1 = draw_values([a])
-        val2 = draw_values([a])
-        assert np.all(val1[0] != val2[0])
-
-        point = {"a": np.array([3.0, 4.0])}
-        npt.assert_equal(draw_values([a], point=point), [point["a"]])
-
-    def test_dep_vars(self):
-        with pm.Model():
-            mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5))
-            sd = pm.HalfNormal("sd", shape=2)
-            tau = 1 / sd ** 2
-            a = pm.Normal("a", mu=mu, tau=tau, shape=2)
-
-        point = {"a": np.array([1.0, 2.0])}
-        npt.assert_equal(draw_values([a], point=point), [point["a"]])
-
-        val1 = draw_values([a])[0]
-        val2 = draw_values([a], point={"sd": np.array([2.0, 3.0])})[0]
-        val3 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
-        val4 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
-
-        assert all(
-            [
-                np.all(val1 != val2),
-                np.all(val1 != val3),
-                np.all(val1 != val4),
-                np.all(val2 != val3),
-                np.all(val2 != val4),
-                np.all(val3 != val4),
-            ]
-        )
-
-    def test_graph_constant(self):
-        # Issue 3595 pointed out that slice(None) can introduce
-        # aesara.graph.basic.Constant into the compute graph, which wasn't
-        # handled correctly by draw_values
-        n_d = 500
-        n_x = 2
-        n_y = 1
-        n_g = 10
-        g = np.random.randint(0, n_g, (n_d,))  # group
-        x = np.random.randint(0, n_x, (n_d,))  # x factor
-        with pm.Model():
-            multi_dim_rv = pm.Normal("multi_dim_rv", mu=0, sd=1, shape=(n_x, n_g, n_y))
-            indexed_rv = multi_dim_rv[x, g, :]
-            i = draw_values([indexed_rv])
-            assert i is not None
-
-
-class TestJointDistributionDrawValues(SeededTest):
-    def test_joint_distribution(self):
-        with pm.Model() as model:
-            a = pm.Normal("a", mu=0, sigma=100)
-            b = pm.Normal("b", mu=a, sigma=1e-8)
-            c = pm.Normal("c", mu=a, sigma=1e-8)
-            d = pm.Deterministic("d", b + c)
-
-        # Expected RVs
-        N = 1000
-        norm = np.random.randn(3, N)
-        eA = norm[0] * 100
-        eB = eA + norm[1] * 1e-8
-        eC = eA + norm[2] * 1e-8
-        eD = eB + eC
-
-        # Drawn RVs
-        nr.seed(self.random_seed)
-        #        A, B, C, D = list(zip(*[draw_values([a, b, c, d]) for i in range(N)]))
-        A, B, C, D = draw_values([a, b, c, d], size=N)
-        A = np.array(A).flatten()
-        B = np.array(B).flatten()
-        C = np.array(C).flatten()
-        D = np.array(D).flatten()
-
-        # Assert that the drawn samples match the expected values
-        assert np.allclose(eA, A)
-        assert np.allclose(eB, B)
-        assert np.allclose(eC, C)
-        assert np.allclose(eD, D)
-
-        # Assert that A, B and C have the expected difference
-        assert np.all(np.abs(A - B) < 1e-6)
-        assert np.all(np.abs(A - C) < 1e-6)
-        assert np.all(np.abs(B - C) < 1e-6)
-
-        # Marginal draws
-        mA = np.array([draw_values([a]) for i in range(N)]).flatten()
-        mB = np.array([draw_values([b]) for i in range(N)]).flatten()
-        mC = np.array([draw_values([c]) for i in range(N)]).flatten()
-        # Also test the with model context of draw_values
-        with model:
-            mD = np.array([draw_values([d]) for i in range(N)]).flatten()
-
-        # Assert that the marginal distributions have different sample values
-        assert not np.all(np.abs(B - mB) < 1e-2)
-        assert not np.all(np.abs(C - mC) < 1e-2)
-        assert not np.all(np.abs(D - mD) < 1e-2)
-
-        # Assert that the marginal distributions do not have high cross
-        # correlation
-        assert np.abs(np.corrcoef(mA, mB)[0, 1]) < 0.1
-        assert np.abs(np.corrcoef(mA, mC)[0, 1]) < 0.1
-        assert np.abs(np.corrcoef(mB, mC)[0, 1]) < 0.1

From 08393fb8dab9dc04fecd163a584161d6e2c97d02 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:51:05 -0500
Subject: [PATCH 05/16] Change logp_transform argument to the entire random
 variable

---
 pymc3/distributions/continuous.py | 8 ++++----
 pymc3/model.py                    | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 65c7fee2c0..d21d390c51 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -110,18 +110,18 @@ class BoundedContinuous(Continuous):
 
 
 @logp_transform.register(PositiveContinuous)
-def pos_cont_transform(op, inputs):
+def pos_cont_transform(op, rv_var):
     return transforms.log
 
 
 @logp_transform.register(UnitContinuous)
-def unit_cont_transform(op, inputs):
+def unit_cont_transform(op, rv_var):
     return transforms.logodds
 
 
 @logp_transform.register(BoundedContinuous)
-def bounded_cont_transform(op, inputs):
-    _, _, _, lower, upper = inputs
+def bounded_cont_transform(op, rv_var):
+    _, _, _, lower, upper = rv_var.owner.inputs
     lower = aet.as_tensor_variable(lower) if lower is not None else None
     upper = aet.as_tensor_variable(upper) if upper is not None else None
 
diff --git a/pymc3/model.py b/pymc3/model.py
index 9599f3b288..54172ce854 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1087,7 +1087,8 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans
             self.free_RVs.append(rv_var)
             value_var = rv_var.clone()
 
-            transform = transform or logp_transform(rv_var.owner.op, rv_var.owner.inputs)
+            transform = transform or logp_transform(rv_var.owner.op, rv_var)
+
             if transform is not None:
                 value_var.tag.transform = transform
                 value_var.name = f"{rv_var.name}_{transform.name}"

From fdfb3006ee05f94ad8fc093683691bdb302b745e Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:54:31 -0500
Subject: [PATCH 06/16] Remove logpt transformed option

---
 pymc3/distributions/__init__.py | 25 +++++++++----------------
 pymc3/model.py                  | 16 ++++------------
 2 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index bee51ca910..1d55aa57d9 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -234,7 +234,6 @@ def logpt(
     rv_var: TensorVariable,
     rv_value: Optional[TensorVariable] = None,
     jacobian: Optional[bool] = True,
-    transformed: Optional[bool] = True,
     scaling: Optional[bool] = True,
     **kwargs,
 ) -> TensorVariable:
@@ -256,8 +255,6 @@ def logpt(
         when available, used.
     jacobian
         Whether or not to include the Jacobian term.
-    transformed
-        Return the transformed version of the log-likelihood graph.
     scaling
         A scaling term to apply to the generated log-likelihood graph.
 
@@ -310,27 +307,28 @@ def logpt(
 
     dist_params, replacements = sample_to_measure_vars(dist_params)
 
-    logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs)
+    transform = getattr(rv_value_var.tag, "transform", None)
 
     # If any of the measure vars are transformed measure-space variables
     # (signified by having a `transform` value in their tags), then we apply
     # the their transforms and add their Jacobians (when enabled)
-    if transformed:
+    if transform:
+        logp_var = _logp(rv_node.op, transform.backward(rv_value_var), *dist_params, **kwargs)
         logp_var = transform_logp(
             logp_var,
-            tuple(replacements.values()) + (rv_value_var,),
+            tuple(replacements.values()),
         )
 
-        transform = getattr(rv_value_var.tag, "transform", None)
-
-        if transform and jacobian:
+        if jacobian:
             transformed_jacobian = transform.jacobian_det(rv_value_var)
             if transformed_jacobian:
                 if logp_var.ndim > transformed_jacobian.ndim:
                     logp_var = logp_var.sum(axis=-1)
                 logp_var += transformed_jacobian
+    else:
+        logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs)
 
-        (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value})
+    (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value})
 
     if scaling:
         logp_var *= _get_scaling(
@@ -374,7 +372,7 @@ def _logp(op, value, *dist_params, **kwargs):
     return aet.zeros_like(value)
 
 
-def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs):
+def logcdf(rv_var, rv_value, jacobian=True, **kwargs):
     """Create a log-CDF graph."""
 
     rv_var, rv_value = rv_log_likelihood_args(rv_var)
@@ -389,11 +387,6 @@ def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs):
 
     logp_var = _logcdf(rv_node.op, rv_value, *dist_params, **kwargs)
 
-    if transformed:
-        logp_var = transform_logp(
-            logp_var, tuple(replacements.values()) + (rv_value,), jacobian=jacobian
-        )
-
     return logp_var
 
 
diff --git a/pymc3/model.py b/pymc3/model.py
index 54172ce854..f9a2abf52e 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -893,7 +893,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
             with self:
                 free_RVs_logp = aet.sum(
                     [
-                        aet.sum(logpt(var, getattr(var.tag, "value_var", None), transformed=True))
+                        aet.sum(logpt(var, getattr(var.tag, "value_var", None)))
                         for var in self.free_RVs + self.potentials
                     ]
                 )
@@ -911,10 +911,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
     def logpt(self):
         """Aesara scalar of log-probability of the model"""
         with self:
-            factors = [
-                logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True)
-                for var in self.free_RVs
-            ]
+            factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs]
             factors += [logpt_sum(obs) for obs in self.observed_RVs]
             factors += self.potentials
             logp_var = aet.sum([aet.sum(factor) for factor in factors])
@@ -934,9 +931,7 @@ def logp_nojact(self):
         """
         with self:
             factors = [
-                logpt_sum(
-                    var, getattr(var.tag, "value_var", None), jacobian=False, transformed=True
-                )
+                logpt_sum(var, getattr(var.tag, "value_var", None), jacobian=False)
                 for var in self.free_RVs
             ]
             factors += [logpt_sum(obs, jacobian=False) for obs in self.observed_RVs]
@@ -953,10 +948,7 @@ def varlogpt(self):
         """Aesara scalar of log-probability of the unobserved random variables
         (excluding deterministic)."""
         with self:
-            factors = [
-                logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True)
-                for var in self.free_RVs
-            ]
+            factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs]
             return aet.sum(factors)
 
     @property

From b0c51ed38c683ac040b576bc65870232e2581de4 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:55:52 -0500
Subject: [PATCH 07/16] Implement transform for Dirichlet

---
 pymc3/distributions/multivariate.py | 14 ++++++++++++--
 pymc3/tests/test_distributions.py   | 22 +++++++++++++++-------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index 650d1e1ad2..dfbff54cb4 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -40,7 +40,7 @@
 import pymc3 as pm
 
 from pymc3.aesaraf import floatX, intX
-from pymc3.distributions import _logp, transforms
+from pymc3.distributions import _logp, logp_transform, transforms
 from pymc3.distributions.continuous import ChiSquared, Normal
 from pymc3.distributions.dist_math import bound, factln, logpow
 from pymc3.distributions.distribution import Continuous, Discrete
@@ -459,7 +459,6 @@ class Dirichlet(Continuous):
     """
 
     rv_op = dirichlet
-    default_transform = transforms.stick_breaking
 
     @classmethod
     def dist(cls, a, **kwargs):
@@ -474,6 +473,17 @@ def _distr_parameters_for_repr(self):
         return ["a"]
 
 
+@logp_transform.register(DirichletRV)
+def dirichlet_transform(op, rv_var):
+
+    if rv_var.ndim == 1 or rv_var.broadcastable[-1]:
+        # If this variable is just a bunch of scalars/degenerate
+        # Dirichlets, we can't transform it
+        return None
+
+    return transforms.stick_breaking
+
+
 @_logp.register(DirichletRV)
 def dirichlet_logp(op, value, a):
     """
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index aa5b755ed5..bb47bba6de 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -1825,18 +1825,26 @@ def test_dirichlet(self, n):
     def test_dirichlet_with_batch_shapes(self, dist_shape):
         a = np.ones(dist_shape)
         with pm.Model() as model:
-            d = pm.Dirichlet("a", a=a)
+            d = pm.Dirichlet("d", a=a)
 
-        pymc3_res = logpt(d, d.tag.test_value).eval()
+        d_value = d.tag.value_var
+        d_point = d.eval()
+        if hasattr(d_value.tag, "transform"):
+            d_point_trans = d_value.tag.transform.forward(d_point).eval()
+        else:
+            d_point_trans = d_point
+
+        pymc3_res = logpt(d, d_point_trans, jacobian=False).eval()
+        scipy_res = np.empty_like(pymc3_res)
         for idx in np.ndindex(a.shape[:-1]):
-            scipy_res = scipy.stats.dirichlet(a[idx]).logpdf(d.tag.test_value[idx])
-            assert_almost_equal(pymc3_res[idx], scipy_res)
+            scipy_res[idx] = scipy.stats.dirichlet(a[idx]).logpdf(d_point[idx])
+
+        assert_almost_equal(pymc3_res, scipy_res)
 
     def test_dirichlet_shape(self):
         a = aet.as_tensor_variable(np.r_[1, 2])
-        with pytest.warns(DeprecationWarning):
-            dir_rv = Dirichlet.dist(a)
-            assert dir_rv.shape == (2,)
+        dir_rv = Dirichlet.dist(a)
+        assert dir_rv.shape.eval() == (2,)
 
         with pytest.warns(DeprecationWarning), aesara.change_flags(compute_test_value="ignore"):
             dir_rv = Dirichlet.dist(aet.vector())

From d802a0e0de277333dfb61bbc82e79e6c7bddb52e Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:56:43 -0500
Subject: [PATCH 08/16] Always use the value var to initially build the
 log-likelihood

---
 pymc3/distributions/__init__.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index 1d55aa57d9..bdb63e0df2 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -267,6 +267,9 @@ def logpt(
     else:
         rv_value = aet.as_tensor(rv_value)
 
+    if rv_value_var is None:
+        rv_value_var = rv_value
+
     rv_node = rv_var.owner
 
     if not rv_node:
@@ -298,9 +301,6 @@ def logpt(
 
         return aet.zeros_like(rv_var)
 
-    if rv_value_var is None:
-        raise NotImplementedError(f"The log-likelihood for {rv_var} is undefined")
-
     # This case should be reached when `rv_var` is either the result of an
     # `Observed` or a `RandomVariable` `Op`
     rng, size, dtype, *dist_params = rv_node.inputs
@@ -375,12 +375,14 @@ def _logp(op, value, *dist_params, **kwargs):
 def logcdf(rv_var, rv_value, jacobian=True, **kwargs):
     """Create a log-CDF graph."""
 
-    rv_var, rv_value = rv_log_likelihood_args(rv_var)
+    rv_var, _ = rv_log_likelihood_args(rv_var)
     rv_node = rv_var.owner
 
     if not rv_node:
         raise TypeError()
 
+    rv_value = aet.as_tensor(rv_value)
+
     rng, size, dtype, *dist_params = rv_node.inputs
 
     dist_params, replacements = sample_to_measure_vars(dist_params)

From df7df003593d60759d5578143ac85ecd1c61e87f Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:57:27 -0500
Subject: [PATCH 09/16] Register GammaRV to PositiveContinuous class

---
 pymc3/distributions/continuous.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index d21d390c51..2502877bab 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -2536,6 +2536,9 @@ def _distr_parameters_for_repr(self):
         return ["alpha", "beta"]
 
 
+PositiveContinuous.register(GammaRV)
+
+
 @_logp.register(GammaRV)
 def gamma_logp(op, value, alpha, beta):
     """

From 06284f2b5f3a479068b3d0fc18f581147ea6bb78 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:58:07 -0500
Subject: [PATCH 10/16] Add an option for negative support assertions in Normal
 and Gamma classes

---
 pymc3/distributions/continuous.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 2502877bab..72e2317ad5 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -466,7 +466,7 @@ class Normal(Continuous):
     rv_op = normal
 
     @classmethod
-    def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs):
+    def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs):
         if sd is not None:
             sigma = sd
         tau, sigma = get_tau_sigma(tau=tau, sigma=sigma)
@@ -477,7 +477,9 @@ def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs):
         # mean = median = mode = mu = aet.as_tensor_variable(floatX(mu))
         # variance = 1.0 / self.tau
 
-        assert_negative_support(sigma, "sigma", "Normal")
+        if not no_assert:
+            assert_negative_support(sigma, "sigma", "Normal")
+
         return super().dist([mu, sigma], **kwargs)
 
 
@@ -2500,7 +2502,7 @@ class Gamma(PositiveContinuous):
     rv_op = gamma
 
     @classmethod
-    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs):
+    def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=False, **kwargs):
         if sd is not None:
             sigma = sd
 
@@ -2511,10 +2513,11 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar
         # mode = aet.maximum((alpha - 1) / beta, 0)
         # variance = alpha / beta ** 2
 
-        assert_negative_support(alpha, "alpha", "Gamma")
-        assert_negative_support(beta, "beta", "Gamma")
+        if not no_assert:
+            assert_negative_support(alpha, "alpha", "Gamma")
+            assert_negative_support(beta, "beta", "Gamma")
 
-        return super().dist([alpha, beta], **kwargs)
+        return super().dist([alpha, aet.inv(beta)], **kwargs)
 
     @classmethod
     def get_alpha_beta(cls, alpha=None, beta=None, mu=None, sigma=None):

From 3458eebc3ea2a68e170ded5d938314db5dcc547a Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 01:59:28 -0500
Subject: [PATCH 11/16] Make logp testing work with transformed values

---
 pymc3/tests/test_distributions.py | 39 ++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index bb47bba6de..ac35edeb35 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -221,13 +221,16 @@ def product(domains, n_samples=-1):
 def build_model(distfam, valuedomain, vardomains, extra_args=None):
     if extra_args is None:
         extra_args = {}
+
     with Model() as m:
-        vals = {}
+        param_vars = {}
         for v, dom in vardomains.items():
-            vals[v] = dom.vals[0]
-        vals.update(extra_args)
-        distfam("value", size=valuedomain.shape, transform=None, **vals)
-    return m
+            v_at = aesara.shared(np.asarray(dom.vals[0]))
+            v_at.name = v
+            param_vars[v] = v_at
+        param_vars.update(extra_args)
+        distfam("value", **param_vars, transform=None)
+    return m, param_vars
 
 
 def laplace_asymmetric_logpdf(value, kappa, b, mu):
@@ -606,14 +609,34 @@ def logp_reference(args):
             args.update(scipy_args)
             return scipy_logp(**args)
 
-        model = build_model(pymc3_dist, domain, paramdomains, extra_args)
-        logp = model.fastlogp
+        model, param_vars = build_model(pymc3_dist, domain, paramdomains, extra_args)
+        logp = model.fastlogp_nojac
 
         domains = paramdomains.copy()
         domains["value"] = domain
         for pt in product(domains, n_samples=n_samples):
+
             pt = dict(pt)
-            pt_logp = Point(pt, model=model)
+            pt_d = {}
+            for k, v in pt.items():
+                nv = param_vars.get(k, model.named_vars.get(k))
+                nv = getattr(nv.tag, "value_var", nv)
+
+                transform = getattr(nv.tag, "transform", None)
+                if transform:
+                    # TODO: The compiled graph behind this should be cached and
+                    # reused (if it isn't already).
+                    v = transform.forward(v).eval()
+
+                if nv.name in param_vars:
+                    # Update the shared parameter variables in `param_vars`
+                    param_vars[nv.name].set_value(v)
+                else:
+                    # Create an argument entry for the (potentially
+                    # transformed) "value" variable
+                    pt_d[nv.name] = v
+
+            pt_logp = Point(pt_d, model=model)
             pt_ref = Point(pt, filter_model_vars=False, model=model)
             assert_almost_equal(
                 logp(pt_logp),

From 376a711bf54bc751142cc041532507748274228c Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 02:00:11 -0500
Subject: [PATCH 12/16] Disable asserts during logp invalid range tests

---
 pymc3/tests/test_distributions.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index ac35edeb35..c0a75cda5f 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -745,7 +745,12 @@ def check_logcdf(
                     if invalid_edge is not None:
                         test_params = valid_params.copy()  # Shallow copy should be okay
                         test_params[invalid_param] = invalid_edge
-                        invalid_dist = pymc3_dist.dist(**test_params)
+                        # We need to remove `Assert`s introduced by checks like
+                        # `assert_negative_support` and disable test values;
+                        # otherwise, we won't be able to create the
+                        # `RandomVariable`
+                        with aesara.config.change_flags(compute_test_value="off"):
+                            invalid_dist = pymc3_dist.dist(no_assert=True, **test_params)
                         with aesara.config.change_flags(mode=Mode("py")):
                             assert_equal(
                                 logcdf(invalid_dist, valid_value).eval(),

From b27f848c8d136750707e36cc46dbce655dfc0617 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 02:00:44 -0500
Subject: [PATCH 13/16] Update xfails in pymc3.tests.test_distributions

---
 pymc3/tests/test_distributions.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index c0a75cda5f..9ef4f1aec9 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -985,6 +985,7 @@ def test_chi_squared(self):
             lambda value, nu: sp.chi2.logpdf(value, df=nu),
         )
 
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.xfail(
         condition=(aesara.config.floatX == "float32"),
         reason="Poor CDF in SciPy. See scipy/scipy#869 for details.",
@@ -1004,6 +1005,7 @@ def test_wald_scipy(self):
             lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha),
         )
 
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.parametrize(
         "value,mu,lam,phi,alpha,logp",
         [
@@ -1023,7 +1025,6 @@ def test_wald_scipy(self):
             (50.0, 15.0, None, 0.666666, 10.0, -5.6481874),
         ],
     )
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_wald(self, value, mu, lam, phi, alpha, logp):
         # Log probabilities calculated using the dIG function from the R package gamlss.
         # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or
@@ -1174,6 +1175,7 @@ def scipy_mu_alpha_logcdf(value, mu, alpha):
             n_samples=10,
         )
 
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.parametrize(
         "mu, p, alpha, n, expected",
         [
@@ -1189,7 +1191,6 @@ def scipy_mu_alpha_logcdf(value, mu, alpha):
             (5, 0.5, None, 2, "Can't specify both mu and p."),
         ],
     )
-    @pytest.mark.xfail(reason="Distribution not refactored yet")
     def test_negative_binomial_init_fail(self, mu, p, alpha, n, expected):
         with Model():
             with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"):
@@ -1592,6 +1593,7 @@ def test_zeroinflatedbinomial(self):
             n_samples=10,
         )
 
+    @pytest.mark.xfail(reason="Distribution not refactored yet")
     @pytest.mark.parametrize("n", [1, 2, 3])
     def test_mvnormal(self, n):
         self.check_logp(

From 86af02346c64529fc520645b1451cce971a93498 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 02:11:58 -0500
Subject: [PATCH 14/16] Fix Categorical logp implementation

---
 pymc3/distributions/discrete.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index 4c428a017d..c49a49294c 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -1354,7 +1354,7 @@ def dist(cls, p, **kwargs):
 
 
 @_logp.register(CategoricalRV)
-def categorical_logp(op, value, p, upper):
+def categorical_logp(op, value, p):
     r"""
     Calculate log-probability of Categorical distribution at specified value.
 
@@ -1365,8 +1365,9 @@ def categorical_logp(op, value, p, upper):
         values are desired the values must be provided in a numpy array or `TensorVariable`
 
     """
+    k = aet.shape(p)[-1]
+    p_ = p
     p = p_ / aet.sum(p_, axis=-1, keepdims=True)
-    k = aet.shape(p_)[-1]
     value_clip = aet.clip(value, 0, k - 1)
 
     if p.ndim > 1:

From cc09a09d1679c8d695c5613bb64abb90cd2bb746 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 02:13:11 -0500
Subject: [PATCH 15/16] Use the value var's values directly in logpt

---
 pymc3/distributions/__init__.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index bdb63e0df2..f7bc508d7f 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -313,22 +313,20 @@ def logpt(
     # (signified by having a `transform` value in their tags), then we apply
     # the their transforms and add their Jacobians (when enabled)
     if transform:
-        logp_var = _logp(rv_node.op, transform.backward(rv_value_var), *dist_params, **kwargs)
+        logp_var = _logp(rv_node.op, transform.backward(rv_value), *dist_params, **kwargs)
         logp_var = transform_logp(
             logp_var,
             tuple(replacements.values()),
         )
 
         if jacobian:
-            transformed_jacobian = transform.jacobian_det(rv_value_var)
+            transformed_jacobian = transform.jacobian_det(rv_value)
             if transformed_jacobian:
                 if logp_var.ndim > transformed_jacobian.ndim:
                     logp_var = logp_var.sum(axis=-1)
                 logp_var += transformed_jacobian
     else:
-        logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs)
-
-    (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value})
+        logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs)
 
     if scaling:
         logp_var *= _get_scaling(

From 8977294d9da28708cf59d1f2ac71c0f96af2661b Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Mon, 15 Mar 2021 02:14:46 -0500
Subject: [PATCH 16/16] Do not assume values are Applys in
 pymc3.tests.test_distributions

---
 pymc3/tests/test_distributions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index 9ef4f1aec9..34671cf0f6 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -2740,6 +2740,6 @@ def test_hierarchical_logpt():
     # Make sure that hierarchical random variables are replaced with their
     # log-likelihood space variables in the log-likelhood
     logpt_ancestors = list(ancestors([m.logpt]))
-    assert not any(isinstance(v.owner.op, RandomVariable) for v in logpt_ancestors if v.owner)
+    assert not any(isinstance(v, RandomVariable) for v in logpt_ancestors)
     assert x.tag.value_var in logpt_ancestors
     assert y.tag.value_var in logpt_ancestors