From 1742d44aaca40dc085457a7bc7444e80ccc79eae Mon Sep 17 00:00:00 2001 From: adamboche Date: Sun, 24 Feb 2019 18:09:27 -0800 Subject: [PATCH 1/4] Allow linear model formula to extract variables from calling scope. --- pymc3/glm/linear.py | 10 ++++++---- pymc3/tests/test_glm.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index 4781784e1a..c57d93eec0 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -84,9 +84,10 @@ def __init__(self, x, y, intercept=True, labels=None, @classmethod def from_formula(cls, formula, data, priors=None, vars=None, - name='', model=None, offset=0.): + name='', model=None, offset=0., eval_env=0): import patsy - y, x = patsy.dmatrices(formula, data) + eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) + y, x = patsy.dmatrices(formula, data, eval_env=eval_env) labels = x.design_info.column_names return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, labels=labels, priors=priors, vars=vars, name=name, @@ -140,9 +141,10 @@ def __init__(self, x, y, intercept=True, labels=None, @classmethod def from_formula(cls, formula, data, priors=None, vars=None, family='normal', name='', - model=None, offset=0.): + model=None, offset=0., eval_env=0): import patsy - y, x = patsy.dmatrices(formula, data) + eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) + y, x = patsy.dmatrices(formula, data, eval_env=eval_env) labels = x.design_info.column_names return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False, labels=labels, priors=priors, vars=vars, family=family, diff --git a/pymc3/tests/test_glm.py b/pymc3/tests/test_glm.py index da1a8ef611..762ef9151c 100644 --- a/pymc3/tests/test_glm.py +++ b/pymc3/tests/test_glm.py @@ -2,6 +2,7 @@ from numpy.testing import assert_equal from .helpers import SeededTest +import pymc3 from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample from pymc3 import families, GLM, LinearComponent import pandas as pd @@ -117,3 +118,15 @@ def test_boolean_y(self): ) ) assert_equal(model.y.observations, model_bool.y.observations) + + def test_glm_formula_from_calling_scope(self): + """Formula can extract variables from the calling scope.""" + z = pd.Series([10, 20, 30]) + df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]}) + GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial()) + + def test_linear_component_formula_from_calling_scope(self): + """Formula can extract variables from the calling scope.""" + z = pd.Series([10, 20, 30]) + df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]}) + LinearComponent.from_formula("y ~ x + z", df) From 0b5d6d05b3f6422ada2c3ac3eda0a897089eb69c Mon Sep 17 00:00:00 2001 From: adamboche Date: Sun, 24 Feb 2019 22:12:35 -0800 Subject: [PATCH 2/4] Add docstrings for `GLM.from_formula` and `LinearComponent.from_formula`. --- pymc3/glm/linear.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index c57d93eec0..32e7b49e0b 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -85,6 +85,18 @@ def __init__(self, x, y, intercept=True, labels=None, @classmethod def from_formula(cls, formula, data, priors=None, vars=None, name='', model=None, offset=0., eval_env=0): + """Creates linear component from `patsy` formula. + + Parameters + ---------- + formula : str - a patsy formula + data : a dict-like object that can be used to look up variables referenced + in `formula` + eval_env : either a `patsy.EvalEnvironment` or else a depth represented as + an integer which will be passed to `patsy.EvalEnvironment.capture()`. + See `patsy.dmatrix` and `patsy.EvalEnvironment` for details. + Other arguments are documented in the constructor. + """ import patsy eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) y, x = patsy.dmatrices(formula, data, eval_env=eval_env) @@ -142,6 +154,15 @@ def __init__(self, x, y, intercept=True, labels=None, def from_formula(cls, formula, data, priors=None, vars=None, family='normal', name='', model=None, offset=0., eval_env=0): + """ + formula : str - a `patsy` formula + data : a dict-like object that can be used to look up variables referenced + in `formula` + eval_env : either a `patsy.EvalEnvironment` or else a depth represented as + an integer which will be passed to `patsy.EvalEnvironment.capture()`. + See `patsy.dmatrix` and `patsy.EvalEnvironment` for details. + Other arguments are documented in the constructor. + """ import patsy eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1) y, x = patsy.dmatrices(formula, data, eval_env=eval_env) From 8ccdf363f3c3cef04c91541a164af1b0339d3c12 Mon Sep 17 00:00:00 2001 From: adamboche Date: Sun, 24 Feb 2019 22:16:13 -0800 Subject: [PATCH 3/4] Clean GLM.from_formula docstring. --- pymc3/glm/linear.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pymc3/glm/linear.py b/pymc3/glm/linear.py index 32e7b49e0b..aa25a51235 100644 --- a/pymc3/glm/linear.py +++ b/pymc3/glm/linear.py @@ -155,6 +155,10 @@ def from_formula(cls, formula, data, priors=None, vars=None, family='normal', name='', model=None, offset=0., eval_env=0): """ + Creates GLM from formula. + + Parameters + ---------- formula : str - a `patsy` formula data : a dict-like object that can be used to look up variables referenced in `formula` From e7916de3f88d8262d043754dc07a845a29954bbf Mon Sep 17 00:00:00 2001 From: adamboche Date: Mon, 25 Feb 2019 12:24:34 -0800 Subject: [PATCH 4/4] Add from_formula eval_env change to release notes. --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 73cb1dfefb..068f9c2b52 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -5,6 +5,7 @@ ### New features - `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions. +- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382. ### Maintenance @@ -506,4 +507,3 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta. * maahnman * paul sorenson * zenourn -