pymc-devs · lucianopaz · Feb 27, 2019 · Feb 25, 2019 · Feb 25, 2019 · Feb 25, 2019
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -5,6 +5,7 @@
 ### New features
 
 - `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
+- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382.
 
 ### Maintenance
 
@@ -506,4 +507,3 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta.
 * maahnman <[email protected]>
 * paul sorenson <[email protected]>
 * zenourn <[email protected]>
-
diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py
@@ -84,9 +84,22 @@ def __init__(self, x, y, intercept=True, labels=None,
 
     @classmethod
     def from_formula(cls, formula, data, priors=None, vars=None,
-                     name='', model=None, offset=0.):
+                     name='', model=None, offset=0., eval_env=0):
+        """Creates linear component from `patsy` formula.
+
+        Parameters
+        ----------
+        formula : str - a patsy formula
+        data : a dict-like object that can be used to look up variables referenced
+            in `formula`
+        eval_env : either a `patsy.EvalEnvironment` or else a depth represented as
+            an integer which will be passed to `patsy.EvalEnvironment.capture()`.
+            See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
+        Other arguments are documented in the constructor.
+        """
         import patsy
-        y, x = patsy.dmatrices(formula, data)
+        eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
+        y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
         labels = x.design_info.column_names
         return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
                    labels=labels, priors=priors, vars=vars, name=name,
@@ -140,9 +153,23 @@ def __init__(self, x, y, intercept=True, labels=None,
     @classmethod
     def from_formula(cls, formula, data, priors=None,
                      vars=None, family='normal', name='',
-                     model=None, offset=0.):
+                     model=None, offset=0., eval_env=0):
+        """
+        Creates GLM from formula.
+
+        Parameters
+        ----------
+        formula : str - a `patsy` formula
+        data : a dict-like object that can be used to look up variables referenced
+            in `formula`
+        eval_env : either a `patsy.EvalEnvironment` or else a depth represented as
+            an integer which will be passed to `patsy.EvalEnvironment.capture()`.
+            See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
+        Other arguments are documented in the constructor.
+        """
         import patsy
-        y, x = patsy.dmatrices(formula, data)
+        eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
+        y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
         labels = x.design_info.column_names
         return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
                    labels=labels, priors=priors, vars=vars, family=family,

diff --git a/pymc3/tests/test_glm.py b/pymc3/tests/test_glm.py
@@ -2,6 +2,7 @@
 from numpy.testing import assert_equal
 
 from .helpers import SeededTest
+import pymc3
 from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample
 from pymc3 import families, GLM, LinearComponent
 import pandas as pd
@@ -117,3 +118,15 @@ def test_boolean_y(self):
             )
         )
         assert_equal(model.y.observations, model_bool.y.observations)
+
+    def test_glm_formula_from_calling_scope(self):
+        """Formula can extract variables from the calling scope."""
+        z = pd.Series([10, 20, 30])
+        df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
+        GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial())
+
+    def test_linear_component_formula_from_calling_scope(self):
+        """Formula can extract variables from the calling scope."""
+        z = pd.Series([10, 20, 30])
+        df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
+        LinearComponent.from_formula("y ~ x + z", df)