Allow registering XTensorVariables directly in model

ricardoV94 · ricardoV94 · commit f571e5dc76dc · 2025-06-21T17:44:33.000+02:00
diff --git a/pymc/dims/__init__.py b/pymc/dims/__init__.py
@@ -36,9 +36,15 @@ def __init__():
 
     # Make PyMC aware of xtensor functionality
     MeasurableOp.register(XRV)
-    lower_xtensor_query = optdb.query("+lower_xtensor")
-    logprob_rewrites_db.register("lower_xtensor", lower_xtensor_query, "basic", position=0.1)
-    initial_point_rewrites_db.register("lower_xtensor", lower_xtensor_query, "basic", position=0.1)
+    logprob_rewrites_db.register(
+        "pre_lower_xtensor", optdb.query("+lower_xtensor"), "basic", position=0.1
+    )
+    logprob_rewrites_db.register(
+        "post_lower_xtensor", optdb.query("+lower_xtensor"), "cleanup", position=5.1
+    )
+    initial_point_rewrites_db.register(
+        "lower_xtensor", optdb.query("+lower_xtensor"), "basic", position=0.1
+    )
 
     # TODO: Better model of probability of bugs
     day_of_conception = datetime.date(2025, 6, 17)
diff --git a/pymc/dims/distribution_core.py b/pymc/dims/distribution_core.py
@@ -14,15 +14,20 @@
 from collections.abc import Callable, Sequence
 from itertools import chain
 
+from pytensor.graph import node_rewriter
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.xtensor import as_xtensor
+from pytensor.xtensor.basic import XTensorFromTensor, xtensor_from_tensor
 from pytensor.xtensor.type import XTensorVariable
 
 from pymc import modelcontext
 from pymc.dims.model import with_dims
-from pymc.distributions import transforms
+from pymc.dims.transforms import log_odds_transform, log_transform
 from pymc.distributions.distribution import _support_point, support_point
 from pymc.distributions.shape_utils import DimsWithEllipsis, convert_dims
+from pymc.logprob.abstract import MeasurableOp, _logprob
+from pymc.logprob.rewriting import measurable_ir_rewrites_db
+from pymc.logprob.utils import filter_measurable_variables
 from pymc.util import UNSET
 
 
@@ -34,6 +39,38 @@ def dimshuffle_support_point(ds_op, _, rv):
     return ds_op(support_point(rv))
 
 
+@_support_point.register(XTensorFromTensor)
+def xtensor_from_tensor_support_point(xtensor_op, _, rv):
+    # We remove the xtensor_from_tensor operation, so initial_point doesn't have to do a further lowering
+    return xtensor_op(support_point(rv))
+
+
+class MeasurableXTensorFromTensor(MeasurableOp, XTensorFromTensor):
+    pass
+
+
+@node_rewriter([XTensorFromTensor])
+def find_measurable_xtensor_from_tensor(fgraph, node) -> list[XTensorVariable] | None:
+    if isinstance(node.op, MeasurableXTensorFromTensor):
+        return None
+
+    if not filter_measurable_variables(node.inputs):
+        return None
+
+    return [MeasurableXTensorFromTensor(dims=node.op.dims)(*node.inputs)]
+
+
+@_logprob.register(MeasurableXTensorFromTensor)
+def measurable_xtensor_from_tensor(op, values, rv, **kwargs):
+    rv_logp = _logprob(rv.owner.op, tuple(v.values for v in values), *rv.owner.inputs, **kwargs)
+    return xtensor_from_tensor(rv_logp, dims=op.dims)
+
+
+measurable_ir_rewrites_db.register(
+    "measurable_xtensor_from_tensor", find_measurable_xtensor_from_tensor, "basic", "xtensor"
+)
+
+
 class DimDistribution:
     """Base class for PyMC distribution that wrap pytensor.xtensor.random operations, and follow xarray-like semantics."""
 
@@ -117,10 +154,10 @@ def __new__(
         else:
             # Align observed dims with those of the RV
             # TODO: If this fails give a more informative error message
-            observed = observed.transpose(*rv_dims).values
+            observed = observed.transpose(*rv_dims)
 
         rv = model.register_rv(
-            rv.values,
+            rv,
             name=name,
             observed=observed,
             total_size=total_size,
@@ -177,10 +214,10 @@ def dist(self, *args, core_dims: str | Sequence[str] | None = None, **kwargs):
 class PositiveDimDistribution(DimDistribution):
     """Base class for positive continuous distributions."""
 
-    default_transform = transforms.log
+    default_transform = log_transform
 
 
 class UnitDimDistribution(DimDistribution):
     """Base class for unit-valued distributions."""
 
-    default_transform = transforms.logodds
+    default_transform = log_odds_transform
diff --git a/pymc/dims/transforms.py b/pymc/dims/transforms.py
@@ -0,0 +1,49 @@
+#   Copyright 2025 - present The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+import pytensor.xtensor as ptx
+
+from pymc.logprob.transforms import Transform
+
+
+class LogTransform(Transform):
+    name = "log"
+
+    def forward(self, value, *inputs):
+        return ptx.math.log(value)
+
+    def backward(self, value, *inputs):
+        return ptx.math.exp(value)
+
+    def log_jac_det(self, value, *inputs):
+        return value
+
+
+log_transform = LogTransform()
+
+
+class LogOddsTransform(Transform):
+    name = "logodds"
+
+    def backward(self, value, *inputs):
+        return ptx.math.expit(value)
+
+    def forward(self, value, *inputs):
+        return ptx.math.log(value / (1 - value))
+
+    def log_jac_det(self, value, *inputs):
+        sigmoid_value = ptx.math.sigmoid(value)
+        return ptx.math.log(sigmoid_value) + ptx.math.log1p(-sigmoid_value)
+
+
+log_odds_transform = LogOddsTransform()
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
@@ -20,7 +20,8 @@
 import pytensor
 import pytensor.tensor as pt
 
-from pytensor.graph.basic import Constant, Variable
+from pytensor.compile.ops import TypeCastingOp
+from pytensor.graph.basic import Apply, Constant, Variable
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.rewriting.db import RewriteDatabaseQuery, SequenceDB
 from pytensor.tensor.variable import TensorVariable
@@ -195,6 +196,14 @@ def inner(seed, *args, **kwargs):
     return make_seeded_function(func)
 
 
+class InitialPoint(TypeCastingOp):
+    def make_node(self, var):
+        return Apply(self, [var], [var.type()])
+
+
+initial_point_op = InitialPoint()
+
+
 def make_initial_point_expression(
     *,
     free_rvs: Sequence[TensorVariable],
@@ -235,6 +244,9 @@ def make_initial_point_expression(
 
     # Clone free_rvs so we don't modify the original graph
     initial_point_fgraph = FunctionGraph(outputs=free_rvs, clone=True)
+    # Wrap each rv in an initial_point Operation to avoid losing dependency between the RVs
+    replacements = tuple((rv, initial_point_op(rv)) for rv in initial_point_fgraph.outputs)
+    toposort_replace(initial_point_fgraph, replacements, reverse=True)
 
     # Apply any rewrites necessary to compute the initial points.
     initial_point_rewriter = initial_point_rewrites_db.query(initial_point_basic_query)
@@ -254,10 +266,10 @@ def make_initial_point_expression(
         if isinstance(strategy, str):
             if strategy == "support_point":
                 try:
-                    value = support_point(variable)
+                    value = support_point(variable.owner.inputs[0])
                 except NotImplementedError:
                     warnings.warn(
-                        f"Moment not defined for variable {variable} of type "
+                        f"support_point not defined for variable {variable} of type "
                         f"{variable.owner.op.__class__.__name__}, defaulting to "
                         f"a draw from the prior. This can lead to difficulties "
                         f"during tuning. You can manually define an initval or "
diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py
@@ -197,7 +197,7 @@ def normal_logp(value, mu, sigma):
         [ir_valued_var] = fgraph.outputs
         [ir_rv, ir_value] = ir_valued_var.owner.inputs
         expr = _logprob_helper(ir_rv, ir_value, **kwargs)
-        cleanup_ir([expr])
+        [expr] = cleanup_ir([expr])
         if warn_rvs:
             _warn_rvs_in_inferred_graph(expr)
         return expr
@@ -297,7 +297,7 @@ def normal_logcdf(value, mu, sigma):
         [ir_valued_rv] = fgraph.outputs
         [ir_rv, ir_value] = ir_valued_rv.owner.inputs
         expr = _logcdf_helper(ir_rv, ir_value, **kwargs)
-        cleanup_ir([expr])
+        [expr] = cleanup_ir([expr])
         if warn_rvs:
             _warn_rvs_in_inferred_graph(expr)
         return expr
@@ -379,7 +379,7 @@ def icdf(rv: TensorVariable, value: TensorLike, warn_rvs=True, **kwargs) -> Tens
         [ir_valued_rv] = fgraph.outputs
         [ir_rv, ir_value] = ir_valued_rv.owner.inputs
         expr = _icdf_helper(ir_rv, ir_value, **kwargs)
-        cleanup_ir([expr])
+        [expr] = cleanup_ir([expr])
         if warn_rvs:
             _warn_rvs_in_inferred_graph(expr)
         return expr
@@ -540,15 +540,15 @@ def conditional_logp(
             f"The logprob terms of the following value variables could not be derived: {missing_value_terms}"
         )
 
-    logprobs = list(values_to_logprobs.values())
-    cleanup_ir(logprobs)
+    values, logprobs = zip(*values_to_logprobs.items())
+    logprobs = cleanup_ir(logprobs)
 
     if warn_rvs:
         rvs_in_logp_expressions = _find_unallowed_rvs_in_graph(logprobs)
         if rvs_in_logp_expressions:
             warnings.warn(RVS_IN_JOINT_LOGP_GRAPH_MSG % rvs_in_logp_expressions, UserWarning)
 
-    return values_to_logprobs
+    return dict(zip(values, logprobs))
 
 
 def transformed_conditional_logp(
diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py
@@ -133,6 +133,8 @@ def remove_DiracDelta(fgraph, node):
 
 
 logprob_rewrites_basic_query = RewriteDatabaseQuery(include=["basic"])
+logprob_rewrites_cleanup_query = RewriteDatabaseQuery(include=["cleanup"])
+
 logprob_rewrites_db = SequenceDB()
 logprob_rewrites_db.name = "logprob_rewrites_db"
 
@@ -276,10 +278,11 @@ def construct_ir_fgraph(
     return fgraph
 
 
-def cleanup_ir(vars: Sequence[Variable]) -> None:
+def cleanup_ir(vars: Sequence[Variable]) -> Sequence[Variable]:
     fgraph = FunctionGraph(outputs=vars, clone=False)
-    ir_rewriter = logprob_rewrites_db.query(RewriteDatabaseQuery(include=["cleanup"]))
+    ir_rewriter = logprob_rewrites_db.query(logprob_rewrites_cleanup_query)
     ir_rewriter.rewrite(fgraph)
+    return fgraph.outputs
 
 
 def assume_valued_outputs(outputs: Sequence[TensorVariable]) -> Sequence[TensorVariable]:
diff --git a/pymc/model/core.py b/pymc/model/core.py
@@ -35,6 +35,8 @@
 from pytensor.compile import DeepCopyOp, Function, ProfileStats, get_mode
 from pytensor.compile.sharedvalue import SharedVariable
 from pytensor.graph.basic import Constant, Variable, ancestors, graph_inputs
+from pytensor.tensor import as_tensor
+from pytensor.tensor.math import variadic_add
 from pytensor.tensor.random.op import RandomVariable
 from pytensor.tensor.random.type import RandomType
 from pytensor.tensor.variable import TensorConstant, TensorVariable
@@ -232,7 +234,9 @@ def __init__(
             grads = pytensor.grad(cost, grad_vars, disconnected_inputs="ignore")
             for grad_wrt, var in zip(grads, grad_vars):
                 grad_wrt.name = f"{var.name}_grad"
-            grads = pt.join(0, *[pt.atleast_1d(grad.ravel()) for grad in grads])
+            grads = pt.join(
+                0, *[as_tensor(grad, allow_xtensor_conversion=True).ravel() for grad in grads]
+            )
             outputs = [cost, grads]
         else:
             outputs = [cost]
@@ -702,7 +706,9 @@ def logp(
         if not sum:
             return logp_factors
 
-        logp_scalar = pt.sum([pt.sum(factor) for factor in logp_factors])
+        logp_scalar = variadic_add(
+            *(as_tensor(factor, allow_xtensor_conversion=True).sum() for factor in logp_factors)
+        )
         logp_scalar_name = "__logp" if jacobian else "__logp_nojac"
         if self.name:
             logp_scalar_name = f"{logp_scalar_name}_{self.name}"
@@ -1322,7 +1328,7 @@ def make_obs_var(
         else:
             if sps.issparse(data):
                 data = sparse.basic.as_sparse(data, name=name)
-            else:
+            elif not isinstance(data, Variable):
                 data = pt.as_tensor_variable(data, name=name)
 
             if total_size:
diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py
@@ -45,7 +45,7 @@
 from pytensor.tensor.random.var import RandomGeneratorSharedVariable
 from pytensor.tensor.rewriting.basic import topo_unconditional_constant_folding
 from pytensor.tensor.rewriting.shape import ShapeFeature
-from pytensor.tensor.sharedvar import SharedVariable, TensorSharedVariable
+from pytensor.tensor.sharedvar import SharedVariable
 from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 from pytensor.tensor.variable import TensorVariable
 
@@ -299,7 +299,9 @@ def smarttypeX(x):
 
 def gradient1(f, v):
     """Flat gradient of f wrt v."""
-    return pt.flatten(grad(f, v, disconnected_inputs="warn"))
+    return pt.as_tensor(
+        grad(f, v, disconnected_inputs="warn"), allow_xtensor_conversion=True
+    ).ravel()
 
 
 empty_gradient = pt.zeros(0, dtype="float32")
@@ -418,11 +420,11 @@ def make_shared_replacements(point, vars, model):
 
 def join_nonshared_inputs(
     point: dict[str, np.ndarray],
-    outputs: list[TensorVariable],
-    inputs: list[TensorVariable],
-    shared_inputs: dict[TensorVariable, TensorSharedVariable] | None = None,
+    outputs: Sequence[Variable],
+    inputs: Sequence[Variable],
+    shared_inputs: dict[Variable, Variable] | None = None,
     make_inputs_shared: bool = False,
-) -> tuple[list[TensorVariable], TensorVariable]:
+) -> tuple[Sequence[Variable], TensorVariable]:
     """
     Create new outputs and input TensorVariables where the non-shared inputs are joined in a single raveled vector input.
 
@@ -547,7 +549,9 @@ def join_nonshared_inputs(
     if not inputs:
         raise ValueError("Empty list of input variables.")
 
-    raveled_inputs = pt.concatenate([var.ravel() for var in inputs])
+    raveled_inputs = pt.concatenate(
+        [pt.as_tensor(var, allow_xtensor_conversion=True).ravel() for var in inputs]
+    )
 
     if not make_inputs_shared:
         tensor_type = raveled_inputs.type
@@ -559,12 +563,15 @@ def join_nonshared_inputs(
     if pytensor.config.compute_test_value != "off":
         joined_inputs.tag.test_value = raveled_inputs.tag.test_value
 
-    replace: dict[TensorVariable, TensorVariable] = {}
+    replace: dict[Variable, Variable] = {}
     last_idx = 0
     for var in inputs:
         shape = point[var.name].shape
         arr_len = np.prod(shape, dtype=int)
-        replace[var] = joined_inputs[last_idx : last_idx + arr_len].reshape(shape).astype(var.dtype)
+        replacement_var = (
+            joined_inputs[last_idx : last_idx + arr_len].reshape(shape).astype(var.dtype)
+        )
+        replace[var] = var.type.filter_variable(replacement_var)
         last_idx += arr_len
 
     if shared_inputs is not None:
diff --git a/tests/dims/test_model.py b/tests/dims/test_model.py
@@ -73,7 +73,7 @@ def test_simple_model():
     np.testing.assert_allclose(draw, draw_same)
     assert not np.allclose(draw, draw_diff)
 
-    observed_values = DataArray(np.ones((3, 5)), dims=("a", "b")).transpose()
+    observed_values = DataArray(np.ones((3, 5)), dims=("a", "b"))
     with observe(xmodel, {"y": observed_values}):
         pm.sample_prior_predictive()
         idata = pm.sample(