From c07c7097b85c0a8e13e399821f44c1b257ce899c Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 10 Mar 2021 23:27:54 -0600 Subject: [PATCH 01/16] Add fallback for missing value_var in BaseTrace --- pymc3/backends/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 1b9e589bc5..114b83246a 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -61,7 +61,7 @@ def __init__(self, name, model=None, vars=None, test_point=None): model = modelcontext(model) self.model = model if vars is None: - vars = [v.tag.value_var for v in model.unobserved_RVs] + vars = [getattr(v.tag, "value_var", v) for v in model.unobserved_RVs] self.vars = vars self.varnames = [var.name for var in vars] self.fn = model.fastfn(vars) From 34a66c7142ba18750f092bbeef1d64594ad1a75a Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Wed, 10 Mar 2021 23:27:22 -0600 Subject: [PATCH 02/16] Use name from Variable.name in pymc3.util.get_var_name --- pymc3/util.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pymc3/util.py b/pymc3/util.py index 247eb380f3..459968aa28 100644 --- a/pymc3/util.py +++ b/pymc3/util.py @@ -22,8 +22,6 @@ import numpy as np import xarray -from aesara.tensor.var import TensorVariable - from pymc3.exceptions import SamplingError LATEX_ESCAPE_RE = re.compile(r"(%|_|\$|#|&)", re.MULTILINE) @@ -169,10 +167,7 @@ def get_repr_for_variable(variable, formatting="plain"): def get_var_name(var): """Get an appropriate, plain variable name for a variable.""" - if isinstance(var, TensorVariable): - return super(TensorVariable, var).__str__() - else: - return str(var) + return getattr(var, "name", str(var)) def update_start_vals(a, b, model): From 6c8a2b30dc70f5bda24c8f0bdaf047c7c1c66673 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Tue, 9 Mar 2021 16:41:52 -0600 Subject: [PATCH 03/16] Reinstate log-likelihood transforms --- pymc3/backends/base.py | 13 +- pymc3/distributions/__init__.py | 211 ++++++++++++++++------------ pymc3/distributions/continuous.py | 48 ++++--- pymc3/distributions/distribution.py | 16 +-- pymc3/distributions/transforms.py | 77 ---------- pymc3/model.py | 80 +++++++---- pymc3/tests/test_transforms.py | 75 +++++----- 7 files changed, 259 insertions(+), 261 deletions(-) diff --git a/pymc3/backends/base.py b/pymc3/backends/base.py index 114b83246a..866a9bbcdc 100644 --- a/pymc3/backends/base.py +++ b/pymc3/backends/base.py @@ -61,7 +61,18 @@ def __init__(self, name, model=None, vars=None, test_point=None): model = modelcontext(model) self.model = model if vars is None: - vars = [getattr(v.tag, "value_var", v) for v in model.unobserved_RVs] + vars = [] + for v in model.unobserved_RVs: + var = getattr(v.tag, "value_var", v) + transform = getattr(var.tag, "transform", None) + if transform: + # We need to create and add an un-transformed version of + # each transformed variable + untrans_var = transform.backward(var) + untrans_var.name = v.name + vars.append(untrans_var) + vars.append(var) + self.vars = vars self.varnames = [var.name for var in vars] self.fn = model.fastfn(vars) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index d33c63b073..bee51ca910 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from functools import singledispatch +from itertools import chain from typing import Generator, List, Optional, Tuple, Union import aesara.tensor as aet @@ -31,6 +32,11 @@ ] +@singledispatch +def logp_transform(op, inputs): + return None + + def _get_scaling(total_size, shape, ndim): """ Gets scaling constant for logp @@ -135,7 +141,6 @@ def change_rv_size( def rv_log_likelihood_args( rv_var: TensorVariable, - rv_value: Optional[TensorVariable] = None, transformed: Optional[bool] = True, ) -> Tuple[TensorVariable, TensorVariable]: """Get a `RandomVariable` and its corresponding log-likelihood `TensorVariable` value. @@ -146,38 +151,24 @@ def rv_log_likelihood_args( A variable corresponding to a `RandomVariable`, whether directly or indirectly (e.g. an observed variable that's the output of an `Observed` `Op`). - rv_value - The measure-space input `TensorVariable` (i.e. "input" to a - log-likelihood). transformed When ``True``, return the transformed value var. Returns ======= The first value in the tuple is the `RandomVariable`, and the second is the - measure-space variable that corresponds with the latter. The first is used - to determine the log likelihood graph and the second is the "input" - parameter to that graph. In the case of an observed `RandomVariable`, the - "input" is actual data; in all other cases, it's just another - `TensorVariable`. + measure-space variable that corresponds with the latter (i.e. the "value" + variable). """ - if rv_value is None: - if rv_var.owner and isinstance(rv_var.owner.op, Observed): - rv_var, rv_value = rv_var.owner.inputs - elif hasattr(rv_var.tag, "value_var"): - rv_value = rv_var.tag.value_var - else: - return rv_var, None - - rv_value = aet.as_tensor_variable(rv_value) - - transform = getattr(rv_value.tag, "transform", None) - if transformed and transform: - rv_value = transform.forward(rv_value) - - return rv_var, rv_value + if rv_var.owner and isinstance(rv_var.owner.op, Observed): + return tuple(rv_var.owner.inputs) + elif hasattr(rv_var.tag, "value_var"): + rv_value = rv_var.tag.value_var + return rv_var, rv_value + else: + return rv_var, None def rv_ancestors(graphs: List[TensorVariable]) -> Generator[TensorVariable, None, None]: @@ -197,22 +188,53 @@ def strip_observed(x: TensorVariable) -> TensorVariable: return x -def sample_to_measure_vars(graphs: List[TensorVariable]) -> List[TensorVariable]: - """Replace `RandomVariable` terms in graphs with their measure-space counterparts.""" +def sample_to_measure_vars( + graphs: List[TensorVariable], +) -> Tuple[List[TensorVariable], List[TensorVariable]]: + """Replace sample-space variables in graphs with their measure-space counterparts. + + Sample-space variables are `TensorVariable` outputs of `RandomVariable` + `Op`s. Measure-space variables are `TensorVariable`s that correspond to + the value of a sample-space variable in a likelihood function (e.g. ``x`` + in ``p(X = x)``, where ``X`` is the corresponding sample-space variable). + (``x`` is also the variable found in ``rv_var.tag.value_var``, so this + function could also be called ``sample_to_value_vars``.) + + Parameters + ========== + graphs + The graphs in which random variables are to be replaced by their + measure variables. + + Returns + ======= + Tuple containing the transformed graphs and a ``dict`` of the replacements + that were made. + """ replace = {} - for anc in rv_ancestors(graphs): - measure_var = getattr(anc.tag, "value_var", None) - if measure_var is not None: - replace[anc] = measure_var + for anc in chain(rv_ancestors(graphs), graphs): + + if not (anc.owner and isinstance(anc.owner.op, RandomVariable)): + continue + + _, value_var = rv_log_likelihood_args(anc) + + if value_var is not None: + replace[anc] = value_var + + if replace: + measure_graphs = clone_replace(graphs, replace=replace) + else: + measure_graphs = graphs - dist_params = clone_replace(graphs, replace=replace) - return dist_params + return measure_graphs, replace def logpt( rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, - jacobian: bool = True, + jacobian: Optional[bool] = True, + transformed: Optional[bool] = True, scaling: Optional[bool] = True, **kwargs, ) -> TensorVariable: @@ -228,15 +250,26 @@ def logpt( rv_var The `RandomVariable` output that determines the log-likelihood graph. rv_value - The input variable for the log-likelihood graph. + The input variable for the log-likelihood graph. If `rv_value` is + a transformed variable, its transformations will be applied. + If no value is provided, `rv_var.tag.value_var` will be checked and, + when available, used. jacobian Whether or not to include the Jacobian term. + transformed + Return the transformed version of the log-likelihood graph. scaling A scaling term to apply to the generated log-likelihood graph. """ - rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value) + rv_var, rv_value_var = rv_log_likelihood_args(rv_var) + + if rv_value is None: + rv_value = rv_value_var + else: + rv_value = aet.as_tensor(rv_value) + rv_node = rv_var.owner if not rv_node: @@ -244,13 +277,13 @@ def logpt( if not isinstance(rv_node.op, RandomVariable): + # This will probably need another generic function... if isinstance(rv_node.op, (Subtensor, AdvancedSubtensor, AdvancedSubtensor1)): raise NotImplementedError("Missing value support is incomplete") # "Flatten" and sum an array of indexed RVs' log-likelihoods rv_var, missing_values = rv_node.inputs - rv_value = rv_var.tag.value_var missing_values = missing_values.data logp_var = aet.sum( @@ -268,28 +301,40 @@ def logpt( return aet.zeros_like(rv_var) + if rv_value_var is None: + raise NotImplementedError(f"The log-likelihood for {rv_var} is undefined") + + # This case should be reached when `rv_var` is either the result of an + # `Observed` or a `RandomVariable` `Op` rng, size, dtype, *dist_params = rv_node.inputs - dist_params = sample_to_measure_vars(dist_params) + dist_params, replacements = sample_to_measure_vars(dist_params) - if jacobian: - logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs) - else: - logp_var = _logp_nojac(rv_node.op, rv_value, *dist_params, **kwargs) + logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs) - # Replace `RandomVariable` ancestors with their corresponding - # log-likelihood input variables - lik_replacements = [ - (v, v.tag.value_var) - for v in ancestors([logp_var]) - if v.owner and isinstance(v.owner.op, RandomVariable) and getattr(v.tag, "value_var", None) - ] + # If any of the measure vars are transformed measure-space variables + # (signified by having a `transform` value in their tags), then we apply + # the their transforms and add their Jacobians (when enabled) + if transformed: + logp_var = transform_logp( + logp_var, + tuple(replacements.values()) + (rv_value_var,), + ) + + transform = getattr(rv_value_var.tag, "transform", None) + + if transform and jacobian: + transformed_jacobian = transform.jacobian_det(rv_value_var) + if transformed_jacobian: + if logp_var.ndim > transformed_jacobian.ndim: + logp_var = logp_var.sum(axis=-1) + logp_var += transformed_jacobian - (logp_var,) = clone_replace([logp_var], replace=lik_replacements) + (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value}) if scaling: logp_var *= _get_scaling( - getattr(rv_var.tag, "total_size", None), rv_value.shape, rv_value.ndim + getattr(rv_var.tag, "total_size", None), rv_value_var.shape, rv_value_var.ndim ) if rv_var.name is not None: @@ -298,6 +343,25 @@ def logpt( return logp_var +def transform_logp(logp_var: TensorVariable, inputs: List[TensorVariable]) -> TensorVariable: + """Transform the inputs of a log-likelihood graph.""" + trans_replacements = {} + for measure_var in inputs: + + transform = getattr(measure_var.tag, "transform", None) + + if transform is None: + continue + + trans_rv_value = transform.backward(measure_var) + trans_replacements[measure_var] = trans_rv_value + + if trans_replacements: + (logp_var,) = clone_replace([logp_var], trans_replacements) + + return logp_var + + @singledispatch def _logp(op, value, *dist_params, **kwargs): """Create a log-likelihood graph. @@ -310,10 +374,10 @@ def _logp(op, value, *dist_params, **kwargs): return aet.zeros_like(value) -def logcdf(rv_var, rv_value, **kwargs): +def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs): """Create a log-CDF graph.""" - rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value) + rv_var, rv_value = rv_log_likelihood_args(rv_var) rv_node = rv_var.owner if not rv_node: @@ -321,9 +385,16 @@ def logcdf(rv_var, rv_value, **kwargs): rng, size, dtype, *dist_params = rv_node.inputs - dist_params = sample_to_measure_vars(dist_params) + dist_params, replacements = sample_to_measure_vars(dist_params) + + logp_var = _logcdf(rv_node.op, rv_value, *dist_params, **kwargs) - return _logcdf(rv_node.op, rv_value, *dist_params, **kwargs) + if transformed: + logp_var = transform_logp( + logp_var, tuple(replacements.values()) + (rv_value,), jacobian=jacobian + ) + + return logp_var @singledispatch @@ -338,38 +409,6 @@ def _logcdf(op, value, *args, **kwargs): raise NotImplementedError() -def logp_nojac(rv_var, rv_value=None, **kwargs): - """Create a graph of the log-likelihood that doesn't include the Jacobian.""" - - rv_var, rv_value = rv_log_likelihood_args(rv_var, rv_value) - rv_node = rv_var.owner - - if not rv_node: - raise TypeError() - - rng, size, dtype, *dist_params = rv_node.inputs - - dist_params = sample_to_measure_vars(dist_params) - - return _logp_nojac(rv_node.op, rv_value, **kwargs) - - -@singledispatch -def _logp_nojac(op, value, *args, **kwargs): - """Return the logp, but do not include a jacobian term for transforms. - - If we use different parametrizations for the same distribution, we - need to add the determinant of the jacobian of the transformation - to make sure the densities still describe the same distribution. - However, MAP estimates are not invariant with respect to the - parameterization, we need to exclude the jacobian terms in this case. - - This function should be overwritten in base classes for transformed - distributions. - """ - return logpt(op, value, *args, **kwargs) - - def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, **kwargs): """Return the sum of the logp values for the given observations. diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 6bd64540d0..65c7fee2c0 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -35,7 +35,7 @@ from scipy.interpolate import InterpolatedUnivariateSpline from pymc3.aesaraf import floatX -from pymc3.distributions import _logcdf, _logp, transforms +from pymc3.distributions import _logcdf, _logp, logp_transform, transforms from pymc3.distributions.dist_math import ( SplineWrapper, betaln, @@ -100,36 +100,41 @@ class PositiveContinuous(Continuous): """Base class for positive continuous distributions""" - default_transform = transforms.log - class UnitContinuous(Continuous): """Base class for continuous distributions on [0,1]""" - default_transform = transforms.logodds - class BoundedContinuous(Continuous): """Base class for bounded continuous distributions""" - default_transform = "auto" - def create_transform(transform="auto", lower=None, upper=None): +@logp_transform.register(PositiveContinuous) +def pos_cont_transform(op, inputs): + return transforms.log - lower = aet.as_tensor_variable(lower) if lower is not None else None - upper = aet.as_tensor_variable(upper) if upper is not None else None - if transform == "auto": - if lower is None and upper is None: - transform = None - elif lower is not None and upper is None: - transform = transforms.lowerbound(lower) - elif lower is None and upper is not None: - transform = transforms.upperbound(upper) - else: - transform = transforms.interval(lower, upper) +@logp_transform.register(UnitContinuous) +def unit_cont_transform(op, inputs): + return transforms.logodds + + +@logp_transform.register(BoundedContinuous) +def bounded_cont_transform(op, inputs): + _, _, _, lower, upper = inputs + lower = aet.as_tensor_variable(lower) if lower is not None else None + upper = aet.as_tensor_variable(upper) if upper is not None else None + + if lower is None and upper is None: + transform = None + elif lower is not None and upper is None: + transform = transforms.lowerbound(lower) + elif lower is None and upper is not None: + transform = transforms.upperbound(upper) + else: + transform = transforms.interval(lower, upper) - return transform + return transform def assert_negative_support(var, label, distname, value=-1e-6): @@ -229,11 +234,10 @@ def dist(cls, lower=0, upper=1, **kwargs): upper = aet.as_tensor_variable(floatX(upper)) # mean = (upper + lower) / 2.0 # median = self.mean + return super().dist([lower, upper], **kwargs) - transform = kwargs.pop("transform", cls.default_transform) - transform = cls.create_transform(transform, lower, upper) - return super().dist([lower, upper], transform=transform, **kwargs) +BoundedContinuous.register(UniformRV) @_logp.register(UniformRV) diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py index 3265451b11..098662b28e 100644 --- a/pymc3/distributions/distribution.py +++ b/pymc3/distributions/distribution.py @@ -19,6 +19,7 @@ import types import warnings +from abc import ABC from typing import TYPE_CHECKING import dill @@ -57,11 +58,10 @@ class _Unpickling: pass -class Distribution: +class Distribution(ABC): """Statistical distribution""" rv_op = None - default_transform = None def __new__(cls, name, *args, **kwargs): try: @@ -93,19 +93,19 @@ def __new__(cls, name, *args, **kwargs): if "shape" in kwargs: raise DeprecationWarning("The `shape` keyword is deprecated; use `size`.") + transform = kwargs.pop("transform", None) + rv_out = cls.dist(*args, rng=rng, **kwargs) - return model.register_rv(rv_out, name, data, total_size, dims=dims) + return model.register_rv(rv_out, name, data, total_size, dims=dims, transform=transform) @classmethod def dist(cls, dist_params, **kwargs): - transform = kwargs.pop("transform", cls.default_transform) + testval = kwargs.pop("testval", None) rv_var = cls.rv_op(*dist_params, **kwargs) - rv_var.tag.transform = transform - if testval is not None: rv_var.tag.test_value = testval @@ -223,7 +223,6 @@ def __init__( dtype, testval=None, defaults=(), - transform=None, parent_dist=None, *args, **kwargs, @@ -270,9 +269,6 @@ def __init__(self, shape=(), dtype=None, defaults=("mode",), *args, **kwargs): if dtype != "int16" and dtype != "int64": raise TypeError("Discrete classes expect dtype to be int16 or int64.") - if kwargs.get("transform", None) is not None: - raise ValueError("Transformations for discrete distributions " "are not allowed.") - super().__init__(shape, dtype, defaults=defaults, *args, **kwargs) diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 163b9ba872..3aa6b81ec0 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -18,15 +18,12 @@ import numpy as np from aesara.tensor.subtensor import advanced_set_subtensor1 -from aesara.tensor.type import TensorType from pymc3.aesaraf import floatX, gradient -from pymc3.distributions import distribution from pymc3.math import invlogit, logit, logsumexp __all__ = [ "Transform", - "transform", "stick_breaking", "logodds", "interval", @@ -112,80 +109,6 @@ def jacobian_det(self, x): return aet.log(aet.abs_(grad)) -class TransformedDistribution(distribution.Distribution): - """A distribution that has been transformed from one space into another.""" - - def __init__(self, dist, transform, *args, **kwargs): - """ - Parameters - ---------- - dist: Distribution - transform: Transform - args, kwargs - arguments to Distribution""" - forward = transform.forward - testval = forward(dist.default()) - - self.dist = dist - self.transform_used = transform - # XXX: `FreeRV` no longer exists - v = None # forward(FreeRV(name="v", distribution=dist)) - self.type = v.type - - super().__init__(v.shape.tag.test_value, v.dtype, testval, dist.defaults, *args, **kwargs) - - if transform.name == "stickbreaking": - b = np.hstack(((np.atleast_1d(self.shape) == 1)[:-1], False)) - # force the last dim not broadcastable - self.type = TensorType(v.dtype, b) - - def logp(self, x): - """ - Calculate log-probability of Transformed distribution at specified value. - - Parameters - ---------- - x: numeric - Value for which log-probability is calculated. - - Returns - ------- - TensorVariable - """ - logp_nojac = self.logp_nojac(x) - jacobian_det = self.transform_used.jacobian_det(x) - if logp_nojac.ndim > jacobian_det.ndim: - logp_nojac = logp_nojac.sum(axis=-1) - return logp_nojac + jacobian_det - - def logp_nojac(self, x): - """ - Calculate log-probability of Transformed distribution at specified value - without jacobian term for transforms. - - Parameters - ---------- - x: numeric - Value for which log-probability is calculated. - - Returns - ------- - TensorVariable - """ - return self.dist.logp(self.transform_used.backward(x)) - - def _repr_latex_(self, **kwargs): - # prevent TransformedDistributions from ending up in LaTeX representations - # of models - return None - - def _distr_parameters_for_repr(self): - return [] - - -transform = Transform - - class Log(ElemwiseTransform): name = "log" diff --git a/pymc3/model.py b/pymc3/model.py index da683ad4f5..9599f3b288 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -29,7 +29,7 @@ from aesara.compile.sharedvalue import SharedVariable from aesara.gradient import grad -from aesara.graph.basic import Variable +from aesara.graph.basic import Constant, Variable, graph_inputs from aesara.tensor.random.op import Observed, observed from aesara.tensor.var import TensorVariable from pandas import Series @@ -39,7 +39,7 @@ from pymc3.aesaraf import generator, gradient, hessian, inputvars from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.data import GenTensorVariable, Minibatch -from pymc3.distributions import change_rv_size, logpt, logpt_sum +from pymc3.distributions import change_rv_size, logp_transform, logpt, logpt_sum from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list from pymc3.memoize import WithMemoization @@ -893,7 +893,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): with self: free_RVs_logp = aet.sum( [ - aet.sum(logpt(var, getattr(var.tag, "value_var", None))) + aet.sum(logpt(var, getattr(var.tag, "value_var", None), transformed=True)) for var in self.free_RVs + self.potentials ] ) @@ -902,15 +902,19 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): costs = [free_RVs_logp, observed_RVs_logp] else: costs = [self.logpt] - varnames = [var.name for var in grad_vars] - extra_vars = [var for var in self.free_RVs if var.name not in varnames] + + input_vars = {i for i in graph_inputs(costs) if not isinstance(i, Constant)} + extra_vars = [var for var in self.free_RVs if var in input_vars] return ValueGradFunction(costs, grad_vars, extra_vars, **kwargs) @property def logpt(self): """Aesara scalar of log-probability of the model""" with self: - factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] + factors = [ + logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True) + for var in self.free_RVs + ] factors += [logpt_sum(obs) for obs in self.observed_RVs] factors += self.potentials logp_var = aet.sum([aet.sum(factor) for factor in factors]) @@ -924,12 +928,15 @@ def logpt(self): def logp_nojact(self): """Aesara scalar of log-probability of the model but without the jacobian if transformed Random Variable is presented. - Note that If there is no transformed variable in the model, logp_nojact + + Note that if there is no transformed variable in the model, logp_nojact will be the same as logpt as there is no need for Jacobian correction. """ with self: factors = [ - logpt_sum(var, getattr(var.tag, "value_var", None), jacobian=False) + logpt_sum( + var, getattr(var.tag, "value_var", None), jacobian=False, transformed=True + ) for var in self.free_RVs ] factors += [logpt_sum(obs, jacobian=False) for obs in self.observed_RVs] @@ -946,7 +953,10 @@ def varlogpt(self): """Aesara scalar of log-probability of the unobserved random variables (excluding deterministic).""" with self: - factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] + factors = [ + logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True) + for var in self.free_RVs + ] return aet.sum(factors) @property @@ -1000,7 +1010,7 @@ def independent_vars(self): @property def test_point(self): """Test point used to check that the model doesn't generate errors""" - return Point(((var.tag.value_var, var.tag.test_value) for var in self.free_RVs), model=self) + return Point(((var, var.tag.test_value) for var in self.vars), model=self) @property def disc_vars(self): @@ -1042,7 +1052,7 @@ def add_coords(self, coords): else: self.coords[name] = coords[name] - def register_rv(self, rv_var, name, data=None, total_size=None, dims=None): + def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, transform=None): """Register an (un)observed random variable with the model. Parameters @@ -1050,11 +1060,11 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None): rv_var: TensorVariable name: str data: array_like (optional) - If data is provided, the variable is observed. If None, - the variable is unobserved. + If data is provided, the variable is observed. If None, + the variable is unobserved. total_size: scalar upscales logp of variable with ``coef = total_size/var.shape[0]`` - dims : tuple + dims: tuple Dimension names for the variable. Returns @@ -1074,17 +1084,24 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None): # In all other cases, the role of the value variable is taken by # observed data. That's why value variables are only referenced in # this branch of the conditional. - value_var = rv_var.clone() - value_var.name = rv_var.name - rv_var.tag.value_var = value_var - self.free_RVs.append(rv_var) + value_var = rv_var.clone() - transform = rv_var.tag.transform - value_var.tag.transform = None - + transform = transform or logp_transform(rv_var.owner.op, rv_var.owner.inputs) if transform is not None: - self.deterministics.append(rv_var) + value_var.tag.transform = transform + value_var.name = f"{rv_var.name}_{transform.name}" + if aesara.config.compute_test_value != "off": + value_var.tag.test_value = transform.forward(value_var).tag.test_value + + # The transformed variable needs to be a named variable in the + # model, too + self.named_vars[value_var.name] = value_var + else: + value_var = rv_var.clone() + value_var.name = rv_var.name + + rv_var.tag.value_var = value_var elif isinstance(data, dict): @@ -1178,7 +1195,7 @@ def __getitem__(self, key): except KeyError: raise e - def makefn(self, outs, mode=None, *args, **kwargs): + def makefn(self, outs, mode=None, transformed=True, *args, **kwargs): """Compiles a Aesara function which returns ``outs`` and takes the variable ancestors of ``outs`` as inputs. @@ -1192,8 +1209,11 @@ def makefn(self, outs, mode=None, *args, **kwargs): Compiled Aesara function """ with self: + vars = [ + v if not transformed else getattr(v.tag, "transformed_var", v) for v in self.vars + ] return aesara.function( - self.vars, + vars, outs, allow_input_downcast=True, on_unused_input="ignore", @@ -1463,7 +1483,7 @@ def fastfn(outs, mode=None, model=None): return model.fastfn(outs, mode) -def Point(*args, filter_model_vars=True, **kwargs): +def Point(*args, filter_model_vars=False, **kwargs): """Build a point. Uses same args as dict() does. Filters out variables not in the model. All keys are strings. @@ -1608,6 +1628,13 @@ def make_obs_var( # We try to reuse the old test value rv_var.tag.test_value = np.broadcast_to(test_value, rv_var.tag.test_value.shape) + # An independent variable used as the generic log-likelihood input + # parameter (i.e. the measure-space counterpart to the sample-space + # variable `rv_var`). + value_var = rv_var.clone() + rv_var.tag.value_var = value_var + value_var.name = f"{rv_var.name}" + missing_values = None mask = getattr(data, "mask", None) if mask is not None: @@ -1623,9 +1650,7 @@ def make_obs_var( data = aet.set_subtensor(constant[mask.nonzero()], missing_values) # Now, we need log-likelihood-space terms for these missing values - value_var = rv_var.clone() value_var.name = f"{rv_var.name}_missing" - rv_var.tag.value_var = value_var elif sps.issparse(data): data = sparse.basic.as_sparse(data, name=name) @@ -1713,7 +1738,6 @@ def Potential(name, var, model=None): model = modelcontext(model) var.name = model.name_for(name) var.tag.scaling = None - var.tag.transform = None model.potentials.append(var) model.add_random_variable(var) return var diff --git a/pymc3/tests/test_transforms.py b/pymc3/tests/test_transforms.py index 740da7d129..a71af261f3 100644 --- a/pymc3/tests/test_transforms.py +++ b/pymc3/tests/test_transforms.py @@ -23,6 +23,7 @@ import pymc3.distributions.transforms as tr from pymc3.aesaraf import jacobian +from pymc3.distributions import logpt from pymc3.tests.checks import close_to, close_to_logical from pymc3.tests.helpers import SeededTest from pymc3.tests.test_distributions import ( @@ -209,7 +210,7 @@ def test_interval_near_boundary(): pm.Uniform("x", testval=x0, lower=lb, upper=ub) log_prob = model.check_test_point() - np.testing.assert_allclose(log_prob.values, np.array([-52.68])) + np.testing.assert_allclose(log_prob, np.array([-52.68])) def test_circular(): @@ -246,42 +247,40 @@ def test_chain(): class TestElementWiseLogp(SeededTest): - def build_model(self, distfam, params, shape, transform, testval=None): + def build_model(self, distfam, params, size, transform, testval=None): if testval is not None: testval = pm.floatX(testval) with pm.Model() as m: - distfam("x", shape=shape, transform=transform, testval=testval, **params) + distfam("x", size=size, transform=transform, testval=testval, **params) return m def check_transform_elementwise_logp(self, model): - x0 = model.deterministics[0] x = model.free_RVs[0] - assert x.ndim == x.logp_elemwiset.ndim + x0 = x.tag.value_var + assert x.ndim == logpt(x).ndim pt = model.test_point - array = np.random.randn(*pt[x.name].shape) - pt[x.name] = array - dist = x.distribution - logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) - jacob_det = dist.transform_used.jacobian_det(aesara.shared(array)) - assert x.logp_elemwiset.ndim == jacob_det.ndim + array = np.random.randn(*pt[x0.name].shape) + transform = x0.tag.transform + logp_nojac = logpt(x, transform.backward(array), jacobian=False) + jacob_det = transform.jacobian_det(aesara.shared(array)) + assert logpt(x).ndim == jacob_det.ndim elementwiselogp = logp_nojac + jacob_det - close_to(x.logp_elemwise(pt), elementwiselogp.eval(), tol) + close_to(logpt(x, array).eval(), elementwiselogp.eval(), tol) def check_vectortransform_elementwise_logp(self, model, vect_opt=0): - x0 = model.deterministics[0] x = model.free_RVs[0] - assert (x.ndim - 1) == x.logp_elemwiset.ndim + x0 = x.tag.value_var + assert (x.ndim - 1) == logpt(x).ndim pt = model.test_point - array = np.random.randn(*pt[x.name].shape) - pt[x.name] = array - dist = x.distribution - logp_nojac = x0.distribution.logp(dist.transform_used.backward(array)) - jacob_det = dist.transform_used.jacobian_det(aesara.shared(array)) - assert x.logp_elemwiset.ndim == jacob_det.ndim + array = np.random.randn(*pt[x0.name].shape) + transform = x0.tag.transform + logp_nojac = logpt(x, transform.backward(array)) + jacob_det = transform.jacobian_det(aesara.shared(array)) + assert logpt(x).ndim == jacob_det.ndim if vect_opt == 0: # the original distribution is univariate @@ -289,7 +288,7 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): else: elementwiselogp = logp_nojac + jacob_det # Hack to get relative tolerance - a = x.logp_elemwise(pt) + a = logpt(x, array).eval() b = elementwiselogp.eval() close_to(a, b, np.abs(0.5 * (a + b) * tol)) @@ -301,13 +300,15 @@ def check_vectortransform_elementwise_logp(self, model, vect_opt=0): (np.ones(3) * 10.0, (4, 3)), ], ) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_half_normal(self, sd, shape): - model = self.build_model(pm.HalfNormal, {"sd": sd}, shape=shape, transform=tr.log) + model = self.build_model(pm.HalfNormal, {"sd": sd}, size=shape, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize("lam,shape", [(2.5, 2), (5.0, (2, 3)), (np.ones(3), (4, 3))]) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_exponential(self, lam, shape): - model = self.build_model(pm.Exponential, {"lam": lam}, shape=shape, transform=tr.log) + model = self.build_model(pm.Exponential, {"lam": lam}, size=shape, transform=tr.log) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( @@ -318,10 +319,9 @@ def test_exponential(self, lam, shape): (np.ones(3), np.ones(3), (4, 3)), ], ) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_beta(self, a, b, shape): - model = self.build_model( - pm.Beta, {"alpha": a, "beta": b}, shape=shape, transform=tr.logodds - ) + model = self.build_model(pm.Beta, {"alpha": a, "beta": b}, size=shape, transform=tr.logodds) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( @@ -335,16 +335,17 @@ def test_beta(self, a, b, shape): def test_uniform(self, lower, upper, shape): interval = tr.Interval(lower, upper) model = self.build_model( - pm.Uniform, {"lower": lower, "upper": upper}, shape=shape, transform=interval + pm.Uniform, {"lower": lower, "upper": upper}, size=shape, transform=interval ) self.check_transform_elementwise_logp(model) @pytest.mark.parametrize( "mu,kappa,shape", [(0.0, 1.0, 2), (-0.5, 5.5, (2, 3)), (np.zeros(3), np.ones(3), (4, 3))] ) + @pytest.mark.xfail(reason="Distribution not refactored yet") def test_vonmises(self, mu, kappa, shape): model = self.build_model( - pm.VonMises, {"mu": mu, "kappa": kappa}, shape=shape, transform=tr.circular + pm.VonMises, {"mu": mu, "kappa": kappa}, size=shape, transform=tr.circular ) self.check_transform_elementwise_logp(model) @@ -352,14 +353,14 @@ def test_vonmises(self, mu, kappa, shape): "a,shape", [(np.ones(2), 2), (np.ones((2, 3)) * 0.5, (2, 3)), (np.ones(3), (4, 3))] ) def test_dirichlet(self, a, shape): - model = self.build_model(pm.Dirichlet, {"a": a}, shape=shape, transform=tr.stick_breaking) + model = self.build_model(pm.Dirichlet, {"a": a}, size=shape, transform=tr.stick_breaking) self.check_vectortransform_elementwise_logp(model, vect_opt=1) def test_normal_ordered(self): model = self.build_model( pm.Normal, {"mu": 0.0, "sd": 1.0}, - shape=3, + size=3, testval=np.asarray([-1.0, 1.0, 4.0]), transform=tr.ordered, ) @@ -378,7 +379,7 @@ def test_half_normal_ordered(self, sd, shape): model = self.build_model( pm.HalfNormal, {"sd": sd}, - shape=shape, + size=shape, testval=testval, transform=tr.Chain([tr.log, tr.ordered]), ) @@ -390,7 +391,7 @@ def test_exponential_ordered(self, lam, shape): model = self.build_model( pm.Exponential, {"lam": lam}, - shape=shape, + size=shape, testval=testval, transform=tr.Chain([tr.log, tr.ordered]), ) @@ -408,7 +409,7 @@ def test_beta_ordered(self, a, b, shape): model = self.build_model( pm.Beta, {"alpha": a, "beta": b}, - shape=shape, + size=shape, testval=testval, transform=tr.Chain([tr.logodds, tr.ordered]), ) @@ -424,7 +425,7 @@ def test_uniform_ordered(self, lower, upper, shape): model = self.build_model( pm.Uniform, {"lower": lower, "upper": upper}, - shape=shape, + size=shape, testval=testval, transform=tr.Chain([interval, tr.ordered]), ) @@ -438,7 +439,7 @@ def test_vonmises_ordered(self, mu, kappa, shape): model = self.build_model( pm.VonMises, {"mu": mu, "kappa": kappa}, - shape=shape, + size=shape, testval=testval, transform=tr.Chain([tr.circular, tr.ordered]), ) @@ -457,7 +458,7 @@ def test_uniform_other(self, lower, upper, shape, transform): model = self.build_model( pm.Uniform, {"lower": lower, "upper": upper}, - shape=shape, + size=shape, testval=testval, transform=transform, ) @@ -473,6 +474,6 @@ def test_uniform_other(self, lower, upper, shape, transform): def test_mvnormal_ordered(self, mu, cov, shape): testval = np.sort(np.random.randn(*shape)) model = self.build_model( - pm.MvNormal, {"mu": mu, "cov": cov}, shape=shape, testval=testval, transform=tr.ordered + pm.MvNormal, {"mu": mu, "cov": cov}, size=shape, testval=testval, transform=tr.ordered ) self.check_vectortransform_elementwise_logp(model, vect_opt=1) From 71301933668586529f5d216876a1be88a300ce07 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Sat, 13 Mar 2021 23:28:17 -0600 Subject: [PATCH 04/16] Remove remaining v3 sampling code --- pymc3/distributions/posterior_predictive.py | 699 -------------------- pymc3/tests/test_data_container.py | 4 +- pymc3/tests/test_distributions_random.py | 2 +- pymc3/tests/test_modelcontext.py | 22 - pymc3/tests/test_posterior_predictive.py | 39 -- pymc3/tests/test_random.py | 187 ------ 6 files changed, 3 insertions(+), 950 deletions(-) delete mode 100644 pymc3/distributions/posterior_predictive.py delete mode 100644 pymc3/tests/test_posterior_predictive.py delete mode 100644 pymc3/tests/test_random.py diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py deleted file mode 100644 index 1125ae9357..0000000000 --- a/pymc3/distributions/posterior_predictive.py +++ /dev/null @@ -1,699 +0,0 @@ -from __future__ import annotations - -import contextvars -import logging -import numbers -import warnings - -from collections import UserDict -from contextlib import AbstractContextManager -from typing import TYPE_CHECKING, Any, Callable, Dict, List, cast, overload - -import aesara.graph.basic -import aesara.graph.fg -import numpy as np - -from aesara.compile.sharedvalue import SharedVariable -from aesara.graph.basic import Constant -from aesara.tensor.var import TensorVariable -from arviz import InferenceData -from typing_extensions import Literal, Protocol -from xarray import Dataset - -from pymc3.backends.base import MultiTrace -from pymc3.distributions.distribution import ( - _compile_aesara_function, - _DrawValuesContext, - _DrawValuesContextBlocker, - is_fast_drawable, - vectorized_ppc, -) -from pymc3.exceptions import IncorrectArgumentsError -from pymc3.model import ( - Model, - MultiObservedRV, - ObservedRV, - get_named_nodes_and_relations, - modelcontext, -) -from pymc3.util import chains_and_samples, dataset_to_point_list, get_var_name - -# Failing tests: -# test_mixture_random_shape::test_mixture_random_shape -# - -Point = Dict[str, np.ndarray] - - -class HasName(Protocol): - name: str - - -class _TraceDict(UserDict): - """This class extends the standard trace-based representation - of traces by adding some helpful attributes used in posterior predictive - sampling. - - Attributes - ~~~~~~~~~~ - varnames: list of strings""" - - varnames: list[str] - _len: int - data: Point - - def __init__( - self, - point_list: list[Point] | None = None, - multi_trace: MultiTrace | None = None, - dict_: Point | None = None, - ): - """""" - if multi_trace: - assert point_list is None and dict_ is None - self.data = {} - self._len = sum(len(multi_trace._straces[chain]) for chain in multi_trace.chains) - self.varnames = multi_trace.varnames - for vn in multi_trace.varnames: - self.data[vn] = multi_trace.get_values(vn) - if point_list is not None: - assert multi_trace is None and dict_ is None - self.varnames = varnames = list(point_list[0].keys()) - rep_values = [point_list[0][varname] for varname in varnames] - # translate the point list. - self._len = num_points = len(point_list) - - def arr_for(val): - if np.isscalar(val): - return np.ndarray(shape=(num_points,)) - elif isinstance(val, np.ndarray): - shp = (num_points,) + val.shape - return np.ndarray(shape=shp) - else: - raise TypeError( - "Illegal object %s of type %s as value of variable in point list." - % (val, type(val)) - ) - - self.data = {name: arr_for(val) for name, val in zip(varnames, rep_values)} - for i, point in enumerate(point_list): - for var, value in point.items(): - self.data[var][i] = value - if dict_ is not None: - assert point_list is None and multi_trace is None - self.data = dict_ - self.varnames = list(dict_.keys()) - self._len = dict_[self.varnames[0]].shape[0] - assert self.varnames is not None and self._len is not None and self.data is not None - - def __len__(self) -> int: - return self._len - - def _extract_slice(self, slc: slice) -> _TraceDict: - sliced_dict: Point = {} - - def apply_slice(arr: np.ndarray) -> np.ndarray: - if len(arr.shape) == 1: - return arr[slc] - else: - return arr[slc, :] - - for vn, arr in self.data.items(): - sliced_dict[vn] = apply_slice(arr) - return _TraceDict(dict_=sliced_dict) - - @overload - def __getitem__(self, item: str | HasName) -> np.ndarray: - ... - - @overload - def __getitem__(self, item: slice | int) -> _TraceDict: - ... - - def __getitem__(self, item): - if isinstance(item, str): - return super().__getitem__(item) - elif isinstance(item, slice): - return self._extract_slice(item) - elif isinstance(item, int): - return _TraceDict(dict_={k: np.atleast_1d(v[item]) for k, v in self.data.items()}) - elif hasattr(item, "name"): - return super().__getitem__(item.name) - else: - raise IndexError("Illegal index %s for _TraceDict" % str(item)) - - -def fast_sample_posterior_predictive( - trace: MultiTrace | Dataset | InferenceData | list[dict[str, np.ndarray]], - samples: int | None = None, - model: Model | None = None, - var_names: list[str] | None = None, - keep_size: bool = False, - random_seed=None, -) -> dict[str, np.ndarray]: - """Generate posterior predictive samples from a model given a trace. - - This is a vectorized alternative to the standard ``sample_posterior_predictive`` function. - It aims to be as compatible as possible with the original API, and is significantly - faster. Both posterior predictive sampling functions have some remaining issues, and - we encourage users to verify agreement across the results of both functions for the time - being. - - Parameters - ---------- - trace: MultiTrace, xarray.Dataset, InferenceData, or List of points (dictionary) - Trace generated from MCMC sampling. - samples: int, optional - Number of posterior predictive samples to generate. Defaults to one posterior predictive - sample per posterior sample, that is, the number of draws times the number of chains. It - is not recommended to modify this value; when modified, some chains may not be represented - in the posterior predictive sample. - model: Model (optional if in `with` context) - Model used to generate `trace` - var_names: Iterable[str] - List of vars to sample. - keep_size: bool, optional - Force posterior predictive sample to have the same shape as posterior and sample stats - data: ``(nchains, ndraws, ...)``. - random_seed: int - Seed for the random number generator. - - Returns - ------- - samples: dict - Dictionary with the variable names as keys, and values numpy arrays containing - posterior predictive samples. - """ - - ### Implementation note: primarily this function canonicalizes the arguments: - ### Establishing the model context, wrangling the number of samples, - ### Canonicalizing the trace argument into a _TraceDict object and fitting it - ### to the requested number of samples. Then it invokes posterior_predictive_draw_values - ### *repeatedly*. It does this repeatedly, because the trace argument is set up to be - ### the same as the number of samples. So if the number of samples requested is - ### greater than the number of samples in the trace parameter, we sample repeatedly. This - ### makes the shape issues just a little easier to deal with. - - if isinstance(trace, InferenceData): - nchains, ndraws = chains_and_samples(trace) - trace = dataset_to_point_list(trace.posterior) - elif isinstance(trace, Dataset): - nchains, ndraws = chains_and_samples(trace) - trace = dataset_to_point_list(trace) - elif isinstance(trace, MultiTrace): - nchains = trace.nchains - ndraws = len(trace) - else: - if keep_size: - # arguably this should be just a warning. - raise IncorrectArgumentsError( - "For keep_size, cannot identify chains and length from %s.", trace - ) - - model = modelcontext(model) - assert model is not None - - if model.potentials: - warnings.warn( - "The effect of Potentials on other parameters is ignored during posterior predictive sampling. " - "This is likely to lead to invalid or biased predictive samples.", - UserWarning, - ) - - with model: - - if keep_size and samples is not None: - raise IncorrectArgumentsError("Should not specify both keep_size and samples arguments") - - if isinstance(trace, list) and all(isinstance(x, dict) for x in trace): - _trace = _TraceDict(point_list=trace) - elif isinstance(trace, MultiTrace): - _trace = _TraceDict(multi_trace=trace) - else: - raise TypeError( - "Unable to generate posterior predictive samples from argument of type %s" - % type(trace) - ) - - len_trace = len(_trace) - - assert isinstance(_trace, _TraceDict) - - _samples: list[int] = [] - # temporary replacement for more complicated logic. - max_samples: int = len_trace - if samples is None or samples == max_samples: - _samples = [max_samples] - elif samples < max_samples: - warnings.warn( - "samples parameter is smaller than nchains times ndraws, some draws " - "and/or chains may not be represented in the returned posterior " - "predictive sample" - ) - # if this is less than the number of samples in the trace, take a slice and - # work with that. - _trace = _trace[slice(samples)] - _samples = [samples] - elif samples > max_samples: - full, rem = divmod(samples, max_samples) - _samples = (full * [max_samples]) + ([rem] if rem != 0 else []) - else: - raise IncorrectArgumentsError( - "Unexpected combination of samples (%s) and max_samples (%d)" - % (samples, max_samples) - ) - - if var_names is None: - vars = model.observed_RVs - else: - vars = [model[x] for x in var_names] - - if random_seed is not None: - np.random.seed(random_seed) - - if TYPE_CHECKING: - _ETPParent = UserDict[str, np.ndarray] # this is only processed by mypy - else: - # this is not seen by mypy but will be executed at runtime. - _ETPParent = UserDict - - class _ExtendableTrace(_ETPParent): - def extend_trace(self, trace: dict[str, np.ndarray]) -> None: - for k, v in trace.items(): - if k in self.data: - self.data[k] = np.concatenate((self.data[k], v)) - else: - self.data[k] = v - - ppc_trace = _ExtendableTrace() - for s in _samples: - strace = _trace if s == len_trace else _trace[slice(0, s)] - try: - values = posterior_predictive_draw_values(cast(List[Any], vars), strace, s) - new_trace: dict[str, np.ndarray] = {k.name: v for (k, v) in zip(vars, values)} - ppc_trace.extend_trace(new_trace) - except KeyboardInterrupt: - pass - - if keep_size: - return {k: ary.reshape((nchains, ndraws, *ary.shape[1:])) for k, ary in ppc_trace.items()} - # this gets us a Dict[str, np.ndarray] instead of my wrapped equiv. - return ppc_trace.data - - -def posterior_predictive_draw_values( - vars: list[Any], trace: _TraceDict, samples: int -) -> list[np.ndarray]: - with _PosteriorPredictiveSampler(vars, trace, samples, None) as sampler: - return sampler.draw_values() - - -class _PosteriorPredictiveSampler(AbstractContextManager): - """The process of posterior predictive sampling is quite complicated so this provides a central data store.""" - - # inputs - vars: list[Any] - trace: _TraceDict - samples: int - size: int | None # not supported! - - # other slots - logger: logging.Logger - - # for the search - evaluated: dict[int, np.ndarray] - symbolic_params: list[tuple[int, Any]] - - # set by make_graph... - leaf_nodes: dict[str, Any] - named_nodes_parents: dict[str, Any] - named_nodes_children: dict[str, Any] - _tok: contextvars.Token - - def __init__(self, vars, trace: _TraceDict, samples, model: Model | None, size=None): - if size is not None: - raise NotImplementedError( - "sample_posterior_predictive does not support the size argument at this time." - ) - assert vars is not None - self.vars = vars - self.trace = trace - self.samples = samples - self.size = size - self.logger = logging.getLogger("posterior_predictive") - - def __enter__(self) -> "_PosteriorPredictiveSampler": - self._tok = vectorized_ppc.set(posterior_predictive_draw_values) - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]: - vectorized_ppc.reset(self._tok) - return False - - def draw_values(self) -> list[np.ndarray]: - vars = self.vars - trace = self.trace - samples = self.samples - # size = self.size - params = dict(enumerate(vars)) - - with _DrawValuesContext() as context: - self.init() - self.make_graph() - - drawn = context.drawn_vars - - # Init givens and the stack of nodes to try to `_draw_value` from - givens = { - p.name: (p, v) - for (p, samples), v in drawn.items() - if getattr(p, "name", None) is not None - } - stack = list(self.leaf_nodes.values()) # A queue would be more appropriate - - while stack: - next_ = stack.pop(0) - if (next_, samples) in drawn: - # If the node already has a givens value, skip it - continue - elif isinstance(next_, (Constant, SharedVariable)): - # If the node is a aesara.tensor.TensorConstant or a - # aesara.tensor.sharedvar.SharedVariable, its value will be - # available automatically in _compile_aesara_function so - # we can skip it. Furthermore, if this node was treated as a - # TensorVariable that should be compiled by aesara in - # _compile_aesara_function, it would raise a `TypeError: - # ('Constants not allowed in param list', ...)` for - # TensorConstant, and a `TypeError: Cannot use a shared - # variable (...) as explicit input` for SharedVariable. - # ObservedRV and MultiObservedRV instances are ViewOPs - # of TensorConstants or SharedVariables, we must add them - # to the stack or risk evaluating deterministics with the - # wrong values (issue #3354) - stack.extend( - [ - node - for node in self.named_nodes_parents[next_] - if isinstance(node, (ObservedRV, MultiObservedRV)) - and (node, samples) not in drawn - ] - ) - continue - else: - # If the node does not have a givens value, try to draw it. - # The named node's children givens values must also be taken - # into account. - children = self.named_nodes_children[next_] - temp_givens = [givens[k] for k in givens if k in children] - try: - # This may fail for autotransformed RVs, which don't - # have the random method - value = self.draw_value(next_, trace=trace, givens=temp_givens) - assert isinstance(value, np.ndarray) - givens[next_.name] = (next_, value) - drawn[(next_, samples)] = value - except aesara.graph.fg.MissingInputError: - # The node failed, so we must add the node's parents to - # the stack of nodes to try to draw from. We exclude the - # nodes in the `params` list. - stack.extend( - [ - node - for node in self.named_nodes_parents[next_] - if node is not None and (node, samples) not in drawn - ] - ) - - # the below makes sure the graph is evaluated in order - # test_distributions_random::TestDrawValues::test_draw_order fails without it - # The remaining params that must be drawn are all hashable - to_eval: set[int] = set() - missing_inputs: set[int] = {j for j, p in self.symbolic_params} - - while to_eval or missing_inputs: - if to_eval == missing_inputs: - raise ValueError( - "Cannot resolve inputs for {}".format( - [get_var_name(trace.varnames[j]) for j in to_eval] - ) - ) - to_eval = set(missing_inputs) - missing_inputs = set() - for param_idx in to_eval: - param = vars[param_idx] - drawn = context.drawn_vars - if (param, samples) in drawn: - self.evaluated[param_idx] = drawn[(param, samples)] - else: - try: - if param in self.named_nodes_children: - for node in self.named_nodes_children[param]: - if node.name not in givens and (node, samples) in drawn: - givens[node.name] = ( - node, - drawn[(node, samples)], - ) - value = self.draw_value(param, trace=self.trace, givens=givens.values()) - assert isinstance(value, np.ndarray) - self.evaluated[param_idx] = drawn[(param, samples)] = value - givens[param.name] = (param, value) - except aesara.graph.fg.MissingInputError: - missing_inputs.add(param_idx) - return [self.evaluated[j] for j in params] - - def init(self) -> None: - """This method carries out the initialization phase of sampling - from the posterior predictive distribution. Notably it initializes the - ``_DrawValuesContext`` bookkeeping object and evaluates the "fast drawable" - parts of the model.""" - vars: list[Any] = self.vars - trace: _TraceDict = self.trace - samples: int = self.samples - leaf_nodes: dict[str, Any] - named_nodes_parents: dict[str, Any] - named_nodes_children: dict[str, Any] - - # initialization phase - context = _DrawValuesContext.get_context() - assert isinstance(context, _DrawValuesContext) - with context: - drawn = context.drawn_vars - evaluated: dict[int, Any] = {} - symbolic_params = [] - for i, var in enumerate(vars): - if is_fast_drawable(var): - evaluated[i] = self.draw_value(var) - continue - name = getattr(var, "name", None) - if (var, samples) in drawn: - evaluated[i] = drawn[(var, samples)] - # We filter out Deterministics by checking for `model` attribute - elif name is not None and hasattr(var, "model") and name in trace.varnames: - # param.name is in the trace. Record it as drawn and evaluated - drawn[(var, samples)] = evaluated[i] = trace[cast(str, name)] - else: - # param still needs to be drawn - symbolic_params.append((i, var)) - self.evaluated = evaluated - self.symbolic_params = symbolic_params - - def make_graph(self) -> None: - # Distribution parameters may be nodes which have named node-inputs - # specified in the point. Need to find the node-inputs, their - # parents and children to replace them. - symbolic_params = self.symbolic_params - self.leaf_nodes = {} - self.named_nodes_parents = {} - self.named_nodes_children = {} - for _, param in symbolic_params: - if hasattr(param, "name"): - # Get the named nodes under the `param` node - nn, nnp, nnc = get_named_nodes_and_relations(param) - self.leaf_nodes.update(nn) - # Update the discovered parental relationships - for k in nnp.keys(): - if k not in self.named_nodes_parents.keys(): - self.named_nodes_parents[k] = nnp[k] - else: - self.named_nodes_parents[k].update(nnp[k]) - # Update the discovered child relationships - for k in nnc.keys(): - if k not in self.named_nodes_children.keys(): - self.named_nodes_children[k] = nnc[k] - else: - self.named_nodes_children[k].update(nnc[k]) - - def draw_value(self, param, trace: _TraceDict | None = None, givens=None): - """Draw a set of random values from a distribution or return a constant. - - Parameters - ---------- - param: number, array like, aesara variable or pymc3 random variable - The value or distribution. Constants or shared variables - will be converted to an array and returned. Aesara variables - are evaluated. If `param` is a pymc3 random variable, draw - values from it and return that (as ``np.ndarray``), unless a - value is specified in the ``trace``. - trace: pm.MultiTrace, optional - A dictionary from pymc3 variable names to samples of their values - used to provide context for evaluating ``param``. - givens: dict, optional - A dictionary from aesara variables to their values. These values - are used to evaluate ``param`` if it is a aesara variable. - """ - samples = self.samples - - def random_sample( - meth: Callable[..., np.ndarray], - param, - point: _TraceDict, - size: int, - shape: tuple[int, ...], - ) -> np.ndarray: - val = meth(point=point, size=size) - try: - assert val.shape == (size,) + shape, ( - "Sampling from random of %s yields wrong shape" % param - ) - # error-quashing here is *extremely* ugly, but it seems to be what the logic in DensityDist wants. - except AssertionError as e: - if ( - hasattr(param, "distribution") - and hasattr(param.distribution, "wrap_random_with_dist_shape") - and not param.distribution.wrap_random_with_dist_shape - ): - pass - else: - raise e - - return val - - if isinstance(param, (numbers.Number, np.ndarray)): - return param - elif isinstance(param, Constant): - return param.value - elif isinstance(param, SharedVariable): - return param.get_value() - elif isinstance(param, (TensorVariable, MultiObservedRV)): - if hasattr(param, "model") and trace and param.name in trace.varnames: - return trace[param.name] - elif hasattr(param, "random") and param.random is not None: - model = modelcontext(None) - assert isinstance(model, Model) - shape: tuple[int, ...] = tuple(_param_shape(param, model)) - return random_sample(param.random, param, point=trace, size=samples, shape=shape) - elif ( - hasattr(param, "distribution") - and hasattr(param.distribution, "random") - and param.distribution.random is not None - ): - if hasattr(param, "observations"): - # shape inspection for ObservedRV - dist_tmp = param.distribution - try: - distshape: tuple[int, ...] = tuple(param.observations.shape.eval()) - except AttributeError: - distshape = tuple(param.observations.shape) - - dist_tmp.shape = distshape - try: - return random_sample( - dist_tmp.random, - param, - point=trace, - size=samples, - shape=distshape, - ) - except (ValueError, TypeError): - # reset shape to account for shape changes - # with aesara.shared inputs - dist_tmp.shape = () - # We want to draw values to infer the dist_shape, - # we don't want to store these drawn values to the context - with _DrawValuesContextBlocker(): - point = trace[0] if trace else None - temp_val = np.atleast_1d(dist_tmp.random(point=point, size=None)) - # if hasattr(param, 'name') and param.name == 'obs': - # import pdb; pdb.set_trace() - # Sometimes point may change the size of val but not the - # distribution's shape - if point and samples is not None: - temp_size = np.atleast_1d(samples) - if all(temp_val.shape[: len(temp_size)] == temp_size): - dist_tmp.shape = tuple(temp_val.shape[len(temp_size) :]) - else: - dist_tmp.shape = tuple(temp_val.shape) - # I am not sure why I need to do this, but I do in order to trim off a - # degenerate dimension [2019/09/05:rpg] - if dist_tmp.shape[0] == 1 and len(dist_tmp.shape) > 1: - dist_tmp.shape = dist_tmp.shape[1:] - return random_sample( - dist_tmp.random, - point=trace, - size=samples, - param=param, - shape=tuple(dist_tmp.shape), - ) - else: # has a distribution, but no observations - distshape = tuple(param.distribution.shape) - return random_sample( - meth=param.distribution.random, - param=param, - point=trace, - size=samples, - shape=distshape, - ) - # NOTE: I think the following is already vectorized. - else: - if givens: - variables, values = list(zip(*givens)) - else: - variables = values = [] - # We only truly care if the ancestors of param that were given - # value have the matching dshape and val.shape - param_ancestors = set( - aesara.graph.basic.ancestors([param], blockers=list(variables)) - ) - inputs = [ - (var, val) for var, val in zip(variables, values) if var in param_ancestors - ] - if inputs: - input_vars, input_vals = list(zip(*inputs)) - else: - input_vars = [] - input_vals = [] - func = _compile_aesara_function(param, input_vars) - if not input_vars: - assert input_vals == [] # AFAICT if there are now vars, there can't be vals - output = func(*input_vals) - if hasattr(output, "shape"): - val = np.repeat(np.expand_dims(output, 0), samples, axis=0) - else: - val = np.full(samples, output) - - else: - val = func(*input_vals) - # np.ndarray([func(*input_vals) for inp in zip(*input_vals)]) - return val - raise ValueError("Unexpected type in draw_value: %s" % type(param)) - - -def _param_shape(var_desig, model: Model) -> tuple[int, ...]: - if isinstance(var_desig, str): - v = model[var_desig] - else: - v = var_desig - if hasattr(v, "observations"): - try: - # To get shape of _observed_ data container `pm.Data` - # (wrapper for SharedVariable) we evaluate it. - shape = tuple(v.observations.shape.eval()) - except AttributeError: - shape = v.observations.shape - elif hasattr(v, "dshape"): - shape = v.dshape - else: - shape = v.tag.test_value.shape - if shape == (1,): - shape = tuple() - return shape diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py index e61e38d3d3..c0fe7e82f3 100644 --- a/pymc3/tests/test_data_container.py +++ b/pymc3/tests/test_data_container.py @@ -151,7 +151,7 @@ def test_shared_scalar_as_rv_input(self): v = pm.Normal("v", mu=shared_var, size=1) np.testing.assert_allclose( - logpt(v, 5.0).eval(), + logpt(v, np.r_[5.0]).eval(), -0.91893853, rtol=1e-5, ) @@ -159,7 +159,7 @@ def test_shared_scalar_as_rv_input(self): shared_var.set_value(10.0) np.testing.assert_allclose( - logpt(v, 10.0).eval(), + logpt(v, np.r_[10.0]).eval(), -0.91893853, rtol=1e-5, ) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 0dd4e19280..95f379836a 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -29,7 +29,7 @@ import pymc3 as pm from pymc3.distributions.dist_math import clipped_beta_rvs -from pymc3.distributions.distribution import to_tuple +from pymc3.distributions.shape_utils import to_tuple from pymc3.exceptions import ShapeError from pymc3.tests.helpers import SeededTest from pymc3.tests.test_distributions import ( diff --git a/pymc3/tests/test_modelcontext.py b/pymc3/tests/test_modelcontext.py index b7d44ca63c..ba14f90921 100644 --- a/pymc3/tests/test_modelcontext.py +++ b/pymc3/tests/test_modelcontext.py @@ -17,10 +17,6 @@ from pytest import raises from pymc3 import Model, Normal -from pymc3.distributions.distribution import ( - _DrawValuesContext, - _DrawValuesContextBlocker, -) from pymc3.model import modelcontext @@ -78,24 +74,6 @@ def test_mixed_contexts(): with modelB: assert Model.get_context() == modelB assert modelcontext(None) == modelB - dvc = _DrawValuesContext() - with dvc: - assert Model.get_context() == modelB - assert modelcontext(None) == modelB - assert _DrawValuesContext.get_context() == dvc - dvcb = _DrawValuesContextBlocker() - with dvcb: - assert _DrawValuesContext.get_context() == dvcb - assert _DrawValuesContextBlocker.get_context() == dvcb - assert _DrawValuesContext.get_context() == dvc - assert _DrawValuesContextBlocker.get_context() is dvc - assert Model.get_context() == modelB - assert modelcontext(None) == modelB - assert _DrawValuesContext.get_context(error_if_none=False) is None - with raises(TypeError): - _DrawValuesContext.get_context() - assert Model.get_context() == modelB - assert modelcontext(None) == modelB assert Model.get_context() == modelA assert modelcontext(None) == modelA assert Model.get_context(error_if_none=False) is None diff --git a/pymc3/tests/test_posterior_predictive.py b/pymc3/tests/test_posterior_predictive.py deleted file mode 100644 index 7a19ac4a59..0000000000 --- a/pymc3/tests/test_posterior_predictive.py +++ /dev/null @@ -1,39 +0,0 @@ -import numpy as np - -import pymc3 as pm - -from pymc3.backends.ndarray import point_list_to_multitrace -from pymc3.distributions.posterior_predictive import _TraceDict - - -def test_translate_point_list(): - with pm.Model() as model: - mu = pm.Normal("mu", 0.0, 1.0) - a = pm.Normal("a", mu=mu, sigma=1, observed=0.0) - mt = point_list_to_multitrace([model.test_point], model) - assert isinstance(mt, pm.backends.base.MultiTrace) - assert {"mu"} == set(mt.varnames) - assert len(mt) == 1 - - -def test_build_TraceDict(): - with pm.Model() as model: - mu = pm.Normal("mu", 0.0, 1.0) - a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) - trace = pm.sample(chains=2, draws=500) - dict = _TraceDict(multi_trace=trace) - assert isinstance(dict, _TraceDict) - assert len(dict) == 1000 - np.testing.assert_array_equal(trace["mu"], dict["mu"]) - assert set(trace.varnames) == set(dict.varnames) == {"mu"} - - -def test_build_TraceDict_point_list(): - with pm.Model() as model: - mu = pm.Normal("mu", 0.0, 1.0) - a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) - dict = _TraceDict(point_list=[model.test_point]) - assert set(dict.varnames) == {"mu"} - assert len(dict) == 1 - assert len(dict["mu"]) == 1 - assert dict["mu"][0] == 0.0 diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py deleted file mode 100644 index f88e6f75f9..0000000000 --- a/pymc3/tests/test_random.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright 2020 The PyMC Developers -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import aesara -import aesara.tensor as aet -import numpy as np -import numpy.testing as npt -import pytest - -from numpy import random as nr - -import pymc3 as pm - -from pymc3.distributions.distribution import _draw_value, draw_values -from pymc3.tests.helpers import SeededTest - - -def test_draw_value(): - npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6]) - npt.assert_equal(_draw_value(np.array(5.0)), 5) - - npt.assert_equal(_draw_value(aet.constant([5.0, 6.0])), [5, 6]) - assert _draw_value(aet.constant(5)) == 5 - npt.assert_equal(_draw_value(2 * aet.constant([5.0, 6.0])), [10, 12]) - - val = aesara.shared(np.array([5.0, 6.0])) - npt.assert_equal(_draw_value(val), [5, 6]) - npt.assert_equal(_draw_value(2 * val), [10, 12]) - - a = aet.scalar("a") - a.tag.test_value = 6 - npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2) - - assert _draw_value(5) == 5 - assert _draw_value(5.0) == 5 - assert isinstance(_draw_value(5.0), type(5.0)) - assert isinstance(_draw_value(5), type(5)) - - with pm.Model(): - mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) - a = pm.Normal("a", mu=mu, sigma=5, shape=2) - - val1 = _draw_value(a) - val2 = _draw_value(a) - assert np.all(val1 != val2) - - with pytest.raises(ValueError) as err: - _draw_value([]) - err.match("Unexpected type") - - -class TestDrawValues: - def test_empty(self): - assert draw_values([]) == [] - - def test_vals(self): - npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6]) - npt.assert_equal(draw_values([np.array(5.0)])[0], 5) - - npt.assert_equal(draw_values([aet.constant([5.0, 6.0])])[0], [5, 6]) - assert draw_values([aet.constant(5)])[0] == 5 - npt.assert_equal(draw_values([2 * aet.constant([5.0, 6.0])])[0], [10, 12]) - - val = aesara.shared(np.array([5.0, 6.0])) - npt.assert_equal(draw_values([val])[0], [5, 6]) - npt.assert_equal(draw_values([2 * val])[0], [10, 12]) - - def test_simple_model(self): - with pm.Model(): - mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) - a = pm.Normal("a", mu=mu, sigma=5, shape=2) - - val1 = draw_values([a]) - val2 = draw_values([a]) - assert np.all(val1[0] != val2[0]) - - point = {"a": np.array([3.0, 4.0])} - npt.assert_equal(draw_values([a], point=point), [point["a"]]) - - def test_dep_vars(self): - with pm.Model(): - mu = 2 * aet.constant(np.array([5.0, 6.0])) + aesara.shared(np.array(5)) - sd = pm.HalfNormal("sd", shape=2) - tau = 1 / sd ** 2 - a = pm.Normal("a", mu=mu, tau=tau, shape=2) - - point = {"a": np.array([1.0, 2.0])} - npt.assert_equal(draw_values([a], point=point), [point["a"]]) - - val1 = draw_values([a])[0] - val2 = draw_values([a], point={"sd": np.array([2.0, 3.0])})[0] - val3 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0] - val4 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0] - - assert all( - [ - np.all(val1 != val2), - np.all(val1 != val3), - np.all(val1 != val4), - np.all(val2 != val3), - np.all(val2 != val4), - np.all(val3 != val4), - ] - ) - - def test_graph_constant(self): - # Issue 3595 pointed out that slice(None) can introduce - # aesara.graph.basic.Constant into the compute graph, which wasn't - # handled correctly by draw_values - n_d = 500 - n_x = 2 - n_y = 1 - n_g = 10 - g = np.random.randint(0, n_g, (n_d,)) # group - x = np.random.randint(0, n_x, (n_d,)) # x factor - with pm.Model(): - multi_dim_rv = pm.Normal("multi_dim_rv", mu=0, sd=1, shape=(n_x, n_g, n_y)) - indexed_rv = multi_dim_rv[x, g, :] - i = draw_values([indexed_rv]) - assert i is not None - - -class TestJointDistributionDrawValues(SeededTest): - def test_joint_distribution(self): - with pm.Model() as model: - a = pm.Normal("a", mu=0, sigma=100) - b = pm.Normal("b", mu=a, sigma=1e-8) - c = pm.Normal("c", mu=a, sigma=1e-8) - d = pm.Deterministic("d", b + c) - - # Expected RVs - N = 1000 - norm = np.random.randn(3, N) - eA = norm[0] * 100 - eB = eA + norm[1] * 1e-8 - eC = eA + norm[2] * 1e-8 - eD = eB + eC - - # Drawn RVs - nr.seed(self.random_seed) - # A, B, C, D = list(zip(*[draw_values([a, b, c, d]) for i in range(N)])) - A, B, C, D = draw_values([a, b, c, d], size=N) - A = np.array(A).flatten() - B = np.array(B).flatten() - C = np.array(C).flatten() - D = np.array(D).flatten() - - # Assert that the drawn samples match the expected values - assert np.allclose(eA, A) - assert np.allclose(eB, B) - assert np.allclose(eC, C) - assert np.allclose(eD, D) - - # Assert that A, B and C have the expected difference - assert np.all(np.abs(A - B) < 1e-6) - assert np.all(np.abs(A - C) < 1e-6) - assert np.all(np.abs(B - C) < 1e-6) - - # Marginal draws - mA = np.array([draw_values([a]) for i in range(N)]).flatten() - mB = np.array([draw_values([b]) for i in range(N)]).flatten() - mC = np.array([draw_values([c]) for i in range(N)]).flatten() - # Also test the with model context of draw_values - with model: - mD = np.array([draw_values([d]) for i in range(N)]).flatten() - - # Assert that the marginal distributions have different sample values - assert not np.all(np.abs(B - mB) < 1e-2) - assert not np.all(np.abs(C - mC) < 1e-2) - assert not np.all(np.abs(D - mD) < 1e-2) - - # Assert that the marginal distributions do not have high cross - # correlation - assert np.abs(np.corrcoef(mA, mB)[0, 1]) < 0.1 - assert np.abs(np.corrcoef(mA, mC)[0, 1]) < 0.1 - assert np.abs(np.corrcoef(mB, mC)[0, 1]) < 0.1 From 08393fb8dab9dc04fecd163a584161d6e2c97d02 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:51:05 -0500 Subject: [PATCH 05/16] Change logp_transform argument to the entire random variable --- pymc3/distributions/continuous.py | 8 ++++---- pymc3/model.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 65c7fee2c0..d21d390c51 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -110,18 +110,18 @@ class BoundedContinuous(Continuous): @logp_transform.register(PositiveContinuous) -def pos_cont_transform(op, inputs): +def pos_cont_transform(op, rv_var): return transforms.log @logp_transform.register(UnitContinuous) -def unit_cont_transform(op, inputs): +def unit_cont_transform(op, rv_var): return transforms.logodds @logp_transform.register(BoundedContinuous) -def bounded_cont_transform(op, inputs): - _, _, _, lower, upper = inputs +def bounded_cont_transform(op, rv_var): + _, _, _, lower, upper = rv_var.owner.inputs lower = aet.as_tensor_variable(lower) if lower is not None else None upper = aet.as_tensor_variable(upper) if upper is not None else None diff --git a/pymc3/model.py b/pymc3/model.py index 9599f3b288..54172ce854 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1087,7 +1087,8 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None, trans self.free_RVs.append(rv_var) value_var = rv_var.clone() - transform = transform or logp_transform(rv_var.owner.op, rv_var.owner.inputs) + transform = transform or logp_transform(rv_var.owner.op, rv_var) + if transform is not None: value_var.tag.transform = transform value_var.name = f"{rv_var.name}_{transform.name}" From fdfb3006ee05f94ad8fc093683691bdb302b745e Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:54:31 -0500 Subject: [PATCH 06/16] Remove logpt transformed option --- pymc3/distributions/__init__.py | 25 +++++++++---------------- pymc3/model.py | 16 ++++------------ 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index bee51ca910..1d55aa57d9 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -234,7 +234,6 @@ def logpt( rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None, jacobian: Optional[bool] = True, - transformed: Optional[bool] = True, scaling: Optional[bool] = True, **kwargs, ) -> TensorVariable: @@ -256,8 +255,6 @@ def logpt( when available, used. jacobian Whether or not to include the Jacobian term. - transformed - Return the transformed version of the log-likelihood graph. scaling A scaling term to apply to the generated log-likelihood graph. @@ -310,27 +307,28 @@ def logpt( dist_params, replacements = sample_to_measure_vars(dist_params) - logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs) + transform = getattr(rv_value_var.tag, "transform", None) # If any of the measure vars are transformed measure-space variables # (signified by having a `transform` value in their tags), then we apply # the their transforms and add their Jacobians (when enabled) - if transformed: + if transform: + logp_var = _logp(rv_node.op, transform.backward(rv_value_var), *dist_params, **kwargs) logp_var = transform_logp( logp_var, - tuple(replacements.values()) + (rv_value_var,), + tuple(replacements.values()), ) - transform = getattr(rv_value_var.tag, "transform", None) - - if transform and jacobian: + if jacobian: transformed_jacobian = transform.jacobian_det(rv_value_var) if transformed_jacobian: if logp_var.ndim > transformed_jacobian.ndim: logp_var = logp_var.sum(axis=-1) logp_var += transformed_jacobian + else: + logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs) - (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value}) + (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value}) if scaling: logp_var *= _get_scaling( @@ -374,7 +372,7 @@ def _logp(op, value, *dist_params, **kwargs): return aet.zeros_like(value) -def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs): +def logcdf(rv_var, rv_value, jacobian=True, **kwargs): """Create a log-CDF graph.""" rv_var, rv_value = rv_log_likelihood_args(rv_var) @@ -389,11 +387,6 @@ def logcdf(rv_var, rv_value, transformed=True, jacobian=True, **kwargs): logp_var = _logcdf(rv_node.op, rv_value, *dist_params, **kwargs) - if transformed: - logp_var = transform_logp( - logp_var, tuple(replacements.values()) + (rv_value,), jacobian=jacobian - ) - return logp_var diff --git a/pymc3/model.py b/pymc3/model.py index 54172ce854..f9a2abf52e 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -893,7 +893,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): with self: free_RVs_logp = aet.sum( [ - aet.sum(logpt(var, getattr(var.tag, "value_var", None), transformed=True)) + aet.sum(logpt(var, getattr(var.tag, "value_var", None))) for var in self.free_RVs + self.potentials ] ) @@ -911,10 +911,7 @@ def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): def logpt(self): """Aesara scalar of log-probability of the model""" with self: - factors = [ - logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True) - for var in self.free_RVs - ] + factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] factors += [logpt_sum(obs) for obs in self.observed_RVs] factors += self.potentials logp_var = aet.sum([aet.sum(factor) for factor in factors]) @@ -934,9 +931,7 @@ def logp_nojact(self): """ with self: factors = [ - logpt_sum( - var, getattr(var.tag, "value_var", None), jacobian=False, transformed=True - ) + logpt_sum(var, getattr(var.tag, "value_var", None), jacobian=False) for var in self.free_RVs ] factors += [logpt_sum(obs, jacobian=False) for obs in self.observed_RVs] @@ -953,10 +948,7 @@ def varlogpt(self): """Aesara scalar of log-probability of the unobserved random variables (excluding deterministic).""" with self: - factors = [ - logpt_sum(var, getattr(var.tag, "value_var", None), transformed=True) - for var in self.free_RVs - ] + factors = [logpt_sum(var, getattr(var.tag, "value_var", None)) for var in self.free_RVs] return aet.sum(factors) @property From b0c51ed38c683ac040b576bc65870232e2581de4 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:55:52 -0500 Subject: [PATCH 07/16] Implement transform for Dirichlet --- pymc3/distributions/multivariate.py | 14 ++++++++++++-- pymc3/tests/test_distributions.py | 22 +++++++++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 650d1e1ad2..dfbff54cb4 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -40,7 +40,7 @@ import pymc3 as pm from pymc3.aesaraf import floatX, intX -from pymc3.distributions import _logp, transforms +from pymc3.distributions import _logp, logp_transform, transforms from pymc3.distributions.continuous import ChiSquared, Normal from pymc3.distributions.dist_math import bound, factln, logpow from pymc3.distributions.distribution import Continuous, Discrete @@ -459,7 +459,6 @@ class Dirichlet(Continuous): """ rv_op = dirichlet - default_transform = transforms.stick_breaking @classmethod def dist(cls, a, **kwargs): @@ -474,6 +473,17 @@ def _distr_parameters_for_repr(self): return ["a"] +@logp_transform.register(DirichletRV) +def dirichlet_transform(op, rv_var): + + if rv_var.ndim == 1 or rv_var.broadcastable[-1]: + # If this variable is just a bunch of scalars/degenerate + # Dirichlets, we can't transform it + return None + + return transforms.stick_breaking + + @_logp.register(DirichletRV) def dirichlet_logp(op, value, a): """ diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index aa5b755ed5..bb47bba6de 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1825,18 +1825,26 @@ def test_dirichlet(self, n): def test_dirichlet_with_batch_shapes(self, dist_shape): a = np.ones(dist_shape) with pm.Model() as model: - d = pm.Dirichlet("a", a=a) + d = pm.Dirichlet("d", a=a) - pymc3_res = logpt(d, d.tag.test_value).eval() + d_value = d.tag.value_var + d_point = d.eval() + if hasattr(d_value.tag, "transform"): + d_point_trans = d_value.tag.transform.forward(d_point).eval() + else: + d_point_trans = d_point + + pymc3_res = logpt(d, d_point_trans, jacobian=False).eval() + scipy_res = np.empty_like(pymc3_res) for idx in np.ndindex(a.shape[:-1]): - scipy_res = scipy.stats.dirichlet(a[idx]).logpdf(d.tag.test_value[idx]) - assert_almost_equal(pymc3_res[idx], scipy_res) + scipy_res[idx] = scipy.stats.dirichlet(a[idx]).logpdf(d_point[idx]) + + assert_almost_equal(pymc3_res, scipy_res) def test_dirichlet_shape(self): a = aet.as_tensor_variable(np.r_[1, 2]) - with pytest.warns(DeprecationWarning): - dir_rv = Dirichlet.dist(a) - assert dir_rv.shape == (2,) + dir_rv = Dirichlet.dist(a) + assert dir_rv.shape.eval() == (2,) with pytest.warns(DeprecationWarning), aesara.change_flags(compute_test_value="ignore"): dir_rv = Dirichlet.dist(aet.vector()) From d802a0e0de277333dfb61bbc82e79e6c7bddb52e Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:56:43 -0500 Subject: [PATCH 08/16] Always use the value var to initially build the log-likelihood --- pymc3/distributions/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index 1d55aa57d9..bdb63e0df2 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -267,6 +267,9 @@ def logpt( else: rv_value = aet.as_tensor(rv_value) + if rv_value_var is None: + rv_value_var = rv_value + rv_node = rv_var.owner if not rv_node: @@ -298,9 +301,6 @@ def logpt( return aet.zeros_like(rv_var) - if rv_value_var is None: - raise NotImplementedError(f"The log-likelihood for {rv_var} is undefined") - # This case should be reached when `rv_var` is either the result of an # `Observed` or a `RandomVariable` `Op` rng, size, dtype, *dist_params = rv_node.inputs @@ -375,12 +375,14 @@ def _logp(op, value, *dist_params, **kwargs): def logcdf(rv_var, rv_value, jacobian=True, **kwargs): """Create a log-CDF graph.""" - rv_var, rv_value = rv_log_likelihood_args(rv_var) + rv_var, _ = rv_log_likelihood_args(rv_var) rv_node = rv_var.owner if not rv_node: raise TypeError() + rv_value = aet.as_tensor(rv_value) + rng, size, dtype, *dist_params = rv_node.inputs dist_params, replacements = sample_to_measure_vars(dist_params) From df7df003593d60759d5578143ac85ecd1c61e87f Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:57:27 -0500 Subject: [PATCH 09/16] Register GammaRV to PositiveContinuous class --- pymc3/distributions/continuous.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index d21d390c51..2502877bab 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2536,6 +2536,9 @@ def _distr_parameters_for_repr(self): return ["alpha", "beta"] +PositiveContinuous.register(GammaRV) + + @_logp.register(GammaRV) def gamma_logp(op, value, alpha, beta): """ From 06284f2b5f3a479068b3d0fc18f581147ea6bb78 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:58:07 -0500 Subject: [PATCH 10/16] Add an option for negative support assertions in Normal and Gamma classes --- pymc3/distributions/continuous.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 2502877bab..72e2317ad5 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -466,7 +466,7 @@ class Normal(Continuous): rv_op = normal @classmethod - def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs): + def dist(cls, mu=0, sigma=None, tau=None, sd=None, no_assert=False, **kwargs): if sd is not None: sigma = sd tau, sigma = get_tau_sigma(tau=tau, sigma=sigma) @@ -477,7 +477,9 @@ def dist(cls, mu=0, sigma=None, tau=None, sd=None, **kwargs): # mean = median = mode = mu = aet.as_tensor_variable(floatX(mu)) # variance = 1.0 / self.tau - assert_negative_support(sigma, "sigma", "Normal") + if not no_assert: + assert_negative_support(sigma, "sigma", "Normal") + return super().dist([mu, sigma], **kwargs) @@ -2500,7 +2502,7 @@ class Gamma(PositiveContinuous): rv_op = gamma @classmethod - def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwargs): + def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, no_assert=False, **kwargs): if sd is not None: sigma = sd @@ -2511,10 +2513,11 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, sd=None, *args, **kwar # mode = aet.maximum((alpha - 1) / beta, 0) # variance = alpha / beta ** 2 - assert_negative_support(alpha, "alpha", "Gamma") - assert_negative_support(beta, "beta", "Gamma") + if not no_assert: + assert_negative_support(alpha, "alpha", "Gamma") + assert_negative_support(beta, "beta", "Gamma") - return super().dist([alpha, beta], **kwargs) + return super().dist([alpha, aet.inv(beta)], **kwargs) @classmethod def get_alpha_beta(cls, alpha=None, beta=None, mu=None, sigma=None): From 3458eebc3ea2a68e170ded5d938314db5dcc547a Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 01:59:28 -0500 Subject: [PATCH 11/16] Make logp testing work with transformed values --- pymc3/tests/test_distributions.py | 39 ++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index bb47bba6de..ac35edeb35 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -221,13 +221,16 @@ def product(domains, n_samples=-1): def build_model(distfam, valuedomain, vardomains, extra_args=None): if extra_args is None: extra_args = {} + with Model() as m: - vals = {} + param_vars = {} for v, dom in vardomains.items(): - vals[v] = dom.vals[0] - vals.update(extra_args) - distfam("value", size=valuedomain.shape, transform=None, **vals) - return m + v_at = aesara.shared(np.asarray(dom.vals[0])) + v_at.name = v + param_vars[v] = v_at + param_vars.update(extra_args) + distfam("value", **param_vars, transform=None) + return m, param_vars def laplace_asymmetric_logpdf(value, kappa, b, mu): @@ -606,14 +609,34 @@ def logp_reference(args): args.update(scipy_args) return scipy_logp(**args) - model = build_model(pymc3_dist, domain, paramdomains, extra_args) - logp = model.fastlogp + model, param_vars = build_model(pymc3_dist, domain, paramdomains, extra_args) + logp = model.fastlogp_nojac domains = paramdomains.copy() domains["value"] = domain for pt in product(domains, n_samples=n_samples): + pt = dict(pt) - pt_logp = Point(pt, model=model) + pt_d = {} + for k, v in pt.items(): + nv = param_vars.get(k, model.named_vars.get(k)) + nv = getattr(nv.tag, "value_var", nv) + + transform = getattr(nv.tag, "transform", None) + if transform: + # TODO: The compiled graph behind this should be cached and + # reused (if it isn't already). + v = transform.forward(v).eval() + + if nv.name in param_vars: + # Update the shared parameter variables in `param_vars` + param_vars[nv.name].set_value(v) + else: + # Create an argument entry for the (potentially + # transformed) "value" variable + pt_d[nv.name] = v + + pt_logp = Point(pt_d, model=model) pt_ref = Point(pt, filter_model_vars=False, model=model) assert_almost_equal( logp(pt_logp), From 376a711bf54bc751142cc041532507748274228c Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 02:00:11 -0500 Subject: [PATCH 12/16] Disable asserts during logp invalid range tests --- pymc3/tests/test_distributions.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index ac35edeb35..c0a75cda5f 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -745,7 +745,12 @@ def check_logcdf( if invalid_edge is not None: test_params = valid_params.copy() # Shallow copy should be okay test_params[invalid_param] = invalid_edge - invalid_dist = pymc3_dist.dist(**test_params) + # We need to remove `Assert`s introduced by checks like + # `assert_negative_support` and disable test values; + # otherwise, we won't be able to create the + # `RandomVariable` + with aesara.config.change_flags(compute_test_value="off"): + invalid_dist = pymc3_dist.dist(no_assert=True, **test_params) with aesara.config.change_flags(mode=Mode("py")): assert_equal( logcdf(invalid_dist, valid_value).eval(), From b27f848c8d136750707e36cc46dbce655dfc0617 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 02:00:44 -0500 Subject: [PATCH 13/16] Update xfails in pymc3.tests.test_distributions --- pymc3/tests/test_distributions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index c0a75cda5f..9ef4f1aec9 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -985,6 +985,7 @@ def test_chi_squared(self): lambda value, nu: sp.chi2.logpdf(value, df=nu), ) + @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", @@ -1004,6 +1005,7 @@ def test_wald_scipy(self): lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha), ) + @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize( "value,mu,lam,phi,alpha,logp", [ @@ -1023,7 +1025,6 @@ def test_wald_scipy(self): (50.0, 15.0, None, 0.666666, 10.0, -5.6481874), ], ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_wald(self, value, mu, lam, phi, alpha, logp): # Log probabilities calculated using the dIG function from the R package gamlss. # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or @@ -1174,6 +1175,7 @@ def scipy_mu_alpha_logcdf(value, mu, alpha): n_samples=10, ) + @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize( "mu, p, alpha, n, expected", [ @@ -1189,7 +1191,6 @@ def scipy_mu_alpha_logcdf(value, mu, alpha): (5, 0.5, None, 2, "Can't specify both mu and p."), ], ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_negative_binomial_init_fail(self, mu, p, alpha, n, expected): with Model(): with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): @@ -1592,6 +1593,7 @@ def test_zeroinflatedbinomial(self): n_samples=10, ) + @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize("n", [1, 2, 3]) def test_mvnormal(self, n): self.check_logp( From 86af02346c64529fc520645b1451cce971a93498 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 02:11:58 -0500 Subject: [PATCH 14/16] Fix Categorical logp implementation --- pymc3/distributions/discrete.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 4c428a017d..c49a49294c 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1354,7 +1354,7 @@ def dist(cls, p, **kwargs): @_logp.register(CategoricalRV) -def categorical_logp(op, value, p, upper): +def categorical_logp(op, value, p): r""" Calculate log-probability of Categorical distribution at specified value. @@ -1365,8 +1365,9 @@ def categorical_logp(op, value, p, upper): values are desired the values must be provided in a numpy array or `TensorVariable` """ + k = aet.shape(p)[-1] + p_ = p p = p_ / aet.sum(p_, axis=-1, keepdims=True) - k = aet.shape(p_)[-1] value_clip = aet.clip(value, 0, k - 1) if p.ndim > 1: From cc09a09d1679c8d695c5613bb64abb90cd2bb746 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 02:13:11 -0500 Subject: [PATCH 15/16] Use the value var's values directly in logpt --- pymc3/distributions/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index bdb63e0df2..f7bc508d7f 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -313,22 +313,20 @@ def logpt( # (signified by having a `transform` value in their tags), then we apply # the their transforms and add their Jacobians (when enabled) if transform: - logp_var = _logp(rv_node.op, transform.backward(rv_value_var), *dist_params, **kwargs) + logp_var = _logp(rv_node.op, transform.backward(rv_value), *dist_params, **kwargs) logp_var = transform_logp( logp_var, tuple(replacements.values()), ) if jacobian: - transformed_jacobian = transform.jacobian_det(rv_value_var) + transformed_jacobian = transform.jacobian_det(rv_value) if transformed_jacobian: if logp_var.ndim > transformed_jacobian.ndim: logp_var = logp_var.sum(axis=-1) logp_var += transformed_jacobian else: - logp_var = _logp(rv_node.op, rv_value_var, *dist_params, **kwargs) - - (logp_var,) = clone_replace([logp_var], replace={rv_value_var: rv_value}) + logp_var = _logp(rv_node.op, rv_value, *dist_params, **kwargs) if scaling: logp_var *= _get_scaling( From 8977294d9da28708cf59d1f2ac71c0f96af2661b Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Mon, 15 Mar 2021 02:14:46 -0500 Subject: [PATCH 16/16] Do not assume values are Applys in pymc3.tests.test_distributions --- pymc3/tests/test_distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 9ef4f1aec9..34671cf0f6 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -2740,6 +2740,6 @@ def test_hierarchical_logpt(): # Make sure that hierarchical random variables are replaced with their # log-likelihood space variables in the log-likelhood logpt_ancestors = list(ancestors([m.logpt])) - assert not any(isinstance(v.owner.op, RandomVariable) for v in logpt_ancestors if v.owner) + assert not any(isinstance(v, RandomVariable) for v in logpt_ancestors) assert x.tag.value_var in logpt_ancestors assert y.tag.value_var in logpt_ancestors