Avoid reusing RNGs across distinct RandomVariables

ricardoV94 · ricardoV94 · commit f6c8aacf8808 · 2022-09-05T10:50:41.000+02:00
This risk is present when resizing a RandomVariable, whose new size depends on the size of the original RandomVariable. This can lead to wrong update expressions for the reused RNG variables that still depended on the original RandomVariable.

Similarly, there is a risk when clone-replacing RandomVariables, as the cloned variables will share the same RNGs. This happened when attempting to use Metropolis with Simulator variables.

* change_rv_size does not reuse old RNG
* compile_pymc raises if distinct update expression are inferred for the same RNG
diff --git a/pymc/aesaraf.py b/pymc/aesaraf.py
@@ -864,6 +864,31 @@ def find_rng_nodes(
     ]
 
 
+def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> Sequence[TensorVariable]:
+    """Replace any RNG nodes upsteram of outputs by new RNGs of the same type
+
+    This can be used when combining a pre-existing graph with a cloned one, to ensure
+    RNGs are unique across the two graphs.
+    """
+    rng_nodes = find_rng_nodes(outputs)
+
+    # Nothing to do here
+    if not rng_nodes:
+        return outputs
+
+    graph = FunctionGraph(outputs=outputs, clone=False)
+    new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = []
+    for rng_node in rng_nodes:
+        rng_cls: type
+        if isinstance(rng_node, at.random.var.RandomStateSharedVariable):
+            rng_cls = np.random.RandomState
+        else:
+            rng_cls = np.random.Generator
+        new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64())))
+    graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True)
+    return graph.outputs
+
+
 SeedSequenceSeed = Optional[Union[int, Sequence[int], np.ndarray, np.random.SeedSequence]]
 
 
@@ -944,12 +969,21 @@ def compile_pymc(
         assert random_var.owner.op is not None
         if isinstance(random_var.owner.op, RandomVariable):
             rng = random_var.owner.inputs[0]
-            if not hasattr(rng, "default_update"):
-                rng_updates[rng] = random_var.owner.outputs[0]
+            if hasattr(rng, "default_update"):
+                update_map = {rng: rng.default_update}
             else:
-                rng_updates[rng] = rng.default_update
+                update_map = {rng: random_var.owner.outputs[0]}
         else:
-            rng_updates.update(random_var.owner.op.update(random_var.owner))
+            update_map = random_var.owner.op.update(random_var.owner)
+        # Check that we are not setting different update expressions for the same variables
+        for rng, update in update_map.items():
+            if rng not in rng_updates:
+                rng_updates[rng] = update
+            # When a variable has multiple outputs, it will be called twice with the same
+            # update expression. We don't want to raise in that case, only if the update
+            # expression in different from the one already registered
+            elif rng_updates[rng] is not update:
+                raise ValueError(f"Multiple update expressions found for the variable {rng}")
 
     # We always reseed random variables as this provides RNGs with no chances of collision
     if rng_updates:
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
@@ -17,6 +17,8 @@
 A collection of common shape operations needed for broadcasting
 samples from probability distributions for stochastic nodes in PyMC.
 """
+import warnings
+
 from functools import singledispatch
 from typing import Optional, Sequence, Tuple, Union
 
@@ -579,8 +581,8 @@ def change_dist_size(
     Returns
     -------
     A new distribution variable that is equivalent to the original distribution with
-    the new size. The new distribution may reuse the same RandomState/Generator inputs
-    as the original distribution.
+    the new size. The new distribution will not reuse the old RandomState/Generator
+    input, so it will be independent from the original distribution.
 
     Examples
     --------
@@ -618,24 +620,29 @@ def change_dist_size(
 def change_rv_size(op, rv, new_size, expand) -> TensorVariable:
     # Extract the RV node that is to be resized
     rv_node = rv.owner
-    rng, size, dtype, *dist_params = rv_node.inputs
+    old_rng, old_size, dtype, *dist_params = rv_node.inputs
 
     if expand:
-        shape = tuple(rv_node.op._infer_shape(size, dist_params))
-        size = shape[: len(shape) - rv_node.op.ndim_supp]
-        new_size = tuple(new_size) + tuple(size)
+        shape = tuple(rv_node.op._infer_shape(old_size, dist_params))
+        old_size = shape[: len(shape) - rv_node.op.ndim_supp]
+        new_size = tuple(new_size) + tuple(old_size)
 
     # Make sure the new size is a tensor. This dtype-aware conversion helps
     # to not unnecessarily pick up a `Cast` in some cases (see #4652).
     new_size = at.as_tensor(new_size, ndim=1, dtype="int64")
 
-    new_rv_node = rv_node.op.make_node(rng, new_size, dtype, *dist_params)
-    new_rv = new_rv_node.outputs[-1]
+    new_rv = rv_node.op(*dist_params, size=new_size, dtype=dtype)
 
-    # Update "traditional" rng default_update, if that was set for old RV
-    default_update = getattr(rng, "default_update", None)
-    if default_update is not None and default_update is rv_node.outputs[0]:
-        rng.default_update = new_rv_node.outputs[0]
+    # Replicate "traditional" rng default_update, if that was set for old_rng
+    default_update = getattr(old_rng, "default_update", None)
+    if default_update is not None:
+        if default_update is rv_node.outputs[0]:
+            new_rv.owner.inputs[0].default_update = new_rv.owner.outputs[0]
+        else:
+            warnings.warn(
+                f"Update expression of {rv} RNG could not be replicated in resized variable",
+                UserWarning,
+            )
 
     return new_rv
 
diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
@@ -248,10 +248,10 @@ def logp(cls, value, sim_op, sim_inputs):
         # TODO: Model rngs should be updated prior to multiprocessing split,
         #  in which case this would not be needed. However, that would have to be
         #  done for every sampler that may accomodate Simulators
-        rng = aesara.shared(np.random.default_rng())
+        rng = aesara.shared(np.random.default_rng(), name="simulator_rng")
         # Create a new simulatorRV with identical inputs as the original one
         sim_value = sim_op.make_node(rng, *sim_inputs[1:]).default_output()
-        sim_value.name = "sim_value"
+        sim_value.name = "simulator_value"
 
         return sim_op.distance(
             sim_op.epsilon,
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
@@ -24,7 +24,7 @@
 from aesara.graph.fg import FunctionGraph
 from aesara.tensor.var import TensorVariable
 
-from pymc.aesaraf import compile_pymc, find_rng_nodes, reseed_rngs
+from pymc.aesaraf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs
 from pymc.util import get_transformed_name, get_untransformed_name, is_transformed_name
 
 StartDict = Dict[Union[Variable, str], Union[np.ndarray, Variable, str]]
@@ -167,18 +167,8 @@ def make_initial_point_fn(
 
     # Replace original rng shared variables so that we don't mess with them
     # when calling the final seeded function
-    graph = FunctionGraph(outputs=initial_values, clone=False)
-    rng_nodes = find_rng_nodes(graph.outputs)
-    new_rng_nodes: List[Union[np.random.RandomState, np.random.Generator]] = []
-    for rng_node in rng_nodes:
-        rng_cls: type
-        if isinstance(rng_node, at.random.var.RandomStateSharedVariable):
-            rng_cls = np.random.RandomState
-        else:
-            rng_cls = np.random.Generator
-        new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64())))
-    graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True)
-    func = compile_pymc(inputs=[], outputs=graph.outputs, mode=aesara.compile.mode.FAST_COMPILE)
+    initial_values = replace_rng_nodes(initial_values)
+    func = compile_pymc(inputs=[], outputs=initial_values, mode=aesara.compile.mode.FAST_COMPILE)
 
     varnames = []
     for var in model.free_RVs:
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
@@ -23,7 +23,14 @@
 
 import pymc as pm
 
-from pymc.aesaraf import compile_pymc, floatX, rvs_to_value_vars
+from pymc.aesaraf import (
+    CallableTensor,
+    compile_pymc,
+    floatX,
+    join_nonshared_inputs,
+    replace_rng_nodes,
+    rvs_to_value_vars,
+)
 from pymc.blocking import DictToArrayBijection, RaveledVars
 from pymc.step_methods.arraystep import (
     ArrayStep,
@@ -1046,12 +1053,14 @@ def sample_except(limit, excluded):
 
 
 def delta_logp(point, logp, vars, shared):
-    [logp0], inarray0 = pm.join_nonshared_inputs(point, [logp], vars, shared)
+    [logp0], inarray0 = join_nonshared_inputs(point, [logp], vars, shared)
 
     tensor_type = inarray0.type
     inarray1 = tensor_type("inarray1")
 
-    logp1 = pm.CallableTensor(logp0)(inarray1)
+    logp1 = CallableTensor(logp0)(inarray1)
+    # Replace any potential duplicated RNG nodes
+    (logp1,) = replace_rng_nodes((logp1,))
 
     f = compile_pymc([inarray1, inarray0], logp1 - logp0)
     f.trust_input = True
diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py
@@ -506,7 +506,8 @@ def test_rv_size_is_none():
 
 def test_change_rv_size():
     loc = at.as_tensor_variable([1, 2])
-    rv = normal(loc=loc)
+    rng = aesara.shared(np.random.default_rng())
+    rv = normal(loc=loc, rng=rng)
     assert rv.ndim == 1
     assert tuple(rv.shape.eval()) == (2,)
 
@@ -525,6 +526,9 @@ def test_change_rv_size():
     assert loc in rv_new_ancestors
     assert rv not in rv_new_ancestors
 
+    # Check that the old rng is not reused
+    assert rv_new.owner.inputs[0] is not rng
+
     rv_newer = change_dist_size(rv_new, new_size=(4,), expand=True)
     assert rv_newer.ndim == 3
     assert tuple(rv_newer.shape.eval()) == (4, 3, 2)
@@ -555,22 +559,27 @@ def test_change_rv_size_default_update():
     rng = aesara.shared(np.random.default_rng(0))
     x = normal(rng=rng)
 
-    # Test that "traditional" default_update is updated
+    # Test that "traditional" default_update is translated to the new rng
     rng.default_update = x.owner.outputs[0]
     new_x = change_dist_size(x, new_size=(2,))
-    assert rng.default_update is not x.owner.outputs[0]
-    assert rng.default_update is new_x.owner.outputs[0]
+    new_rng = new_x.owner.inputs[0]
+    assert rng.default_update is x.owner.outputs[0]
+    assert new_rng.default_update is new_x.owner.outputs[0]
 
-    # Test that "non-traditional" default_update is left unchanged
+    # Test that "non-traditional" default_update raises UserWarning
     next_rng = aesara.shared(np.random.default_rng(1))
     rng.default_update = next_rng
-    new_x = change_dist_size(x, new_size=(2,))
+    with pytest.warns(UserWarning, match="could not be replicated in resized variable"):
+        new_x = change_dist_size(x, new_size=(2,))
+    new_rng = new_x.owner.inputs[0]
     assert rng.default_update is next_rng
+    assert not hasattr(new_rng, "default_update")
 
     # Test that default_update is not set if there was none before
     del rng.default_update
     new_x = change_dist_size(x, new_size=(2,))
-    assert not hasattr(rng, "default_update")
+    new_rng = new_x.owner.inputs[0]
+    assert not hasattr(new_rng, "default_update")
 
 
 def test_change_specify_shape_size_univariate():
diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_aesaraf.py
@@ -37,6 +37,7 @@
     compile_pymc,
     convert_observed_data,
     extract_obs_data,
+    replace_rng_nodes,
     reseed_rngs,
     rvs_to_value_vars,
     walk_model,
@@ -502,6 +503,42 @@ def test_random_seed(self):
         assert x3_eval == x2_eval
         assert y3_eval == y2_eval
 
+    def test_multiple_updates_same_variable(self):
+        rng = aesara.shared(np.random.default_rng(), name="rng")
+        x = at.random.normal(rng=rng)
+        y = at.random.normal(rng=rng)
+
+        assert compile_pymc([], [x])
+        assert compile_pymc([], [y])
+        msg = "Multiple update expressions found for the variable rng"
+        with pytest.raises(ValueError, match=msg):
+            compile_pymc([], [x, y])
+
+
+def test_replace_rng_nodes():
+    rng = aesara.shared(np.random.default_rng())
+    x = at.random.normal(rng=rng)
+    x_rng, *x_non_rng_inputs = x.owner.inputs
+
+    cloned_x = x.owner.clone().default_output()
+    cloned_x_rng, *cloned_x_non_rng_inputs = cloned_x.owner.inputs
+
+    # RNG inputs are the same across the two variables
+    assert x_rng is cloned_x_rng
+
+    (new_x,) = replace_rng_nodes([cloned_x])
+    new_x_rng, *new_x_non_rng_inputs = new_x.owner.inputs
+
+    # Variables are still the same
+    assert new_x is cloned_x
+
+    # RNG inputs are not the same as before
+    assert new_x_rng is not x_rng
+
+    # All other inputs are the same as before
+    for non_rng_inputs, new_non_rng_inputs in zip(x_non_rng_inputs, new_x_non_rng_inputs):
+        assert non_rng_inputs is new_non_rng_inputs
+
 
 def test_reseed_rngs():
     # Reseed_rngs uses the `PCG64` bit_generator, which is currently the default