Skip to content

Allow linear model formula to extract variables from calling scope. #3382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New features

- `Mixture` now supports mixtures of multidimensional probability distributions, not just lists of 1D distributions.
- `GLM.from_formula` and `LinearComponent.from_formula` can extract variables from the calling scope. Customizable via the new `eval_env` argument. Fixing #3382.

### Maintenance

Expand Down Expand Up @@ -506,4 +507,3 @@ Thus, Thomas, Chris and I are pleased to announce that PyMC3 is now in Beta.
* maahnman <[email protected]>
* paul sorenson <[email protected]>
* zenourn <[email protected]>

35 changes: 31 additions & 4 deletions pymc3/glm/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,22 @@ def __init__(self, x, y, intercept=True, labels=None,

@classmethod
def from_formula(cls, formula, data, priors=None, vars=None,
name='', model=None, offset=0.):
name='', model=None, offset=0., eval_env=0):
"""Creates linear component from `patsy` formula.

Parameters
----------
formula : str - a patsy formula
data : a dict-like object that can be used to look up variables referenced
in `formula`
eval_env : either a `patsy.EvalEnvironment` or else a depth represented as
an integer which will be passed to `patsy.EvalEnvironment.capture()`.
See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
Other arguments are documented in the constructor.
"""
import patsy
y, x = patsy.dmatrices(formula, data)
eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
labels = x.design_info.column_names
return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
labels=labels, priors=priors, vars=vars, name=name,
Expand Down Expand Up @@ -140,9 +153,23 @@ def __init__(self, x, y, intercept=True, labels=None,
@classmethod
def from_formula(cls, formula, data, priors=None,
vars=None, family='normal', name='',
model=None, offset=0.):
model=None, offset=0., eval_env=0):
"""
Creates GLM from formula.

Parameters
----------
formula : str - a `patsy` formula
data : a dict-like object that can be used to look up variables referenced
in `formula`
eval_env : either a `patsy.EvalEnvironment` or else a depth represented as
an integer which will be passed to `patsy.EvalEnvironment.capture()`.
See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
Other arguments are documented in the constructor.
"""
import patsy
y, x = patsy.dmatrices(formula, data)
eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
labels = x.design_info.column_names
return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
labels=labels, priors=priors, vars=vars, family=family,
Expand Down
13 changes: 13 additions & 0 deletions pymc3/tests/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from numpy.testing import assert_equal

from .helpers import SeededTest
import pymc3
from pymc3 import Model, Uniform, Normal, find_MAP, Slice, sample
from pymc3 import families, GLM, LinearComponent
import pandas as pd
Expand Down Expand Up @@ -117,3 +118,15 @@ def test_boolean_y(self):
)
)
assert_equal(model.y.observations, model_bool.y.observations)

def test_glm_formula_from_calling_scope(self):
"""Formula can extract variables from the calling scope."""
z = pd.Series([10, 20, 30])
df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
GLM.from_formula("y ~ x + z", df, family=pymc3.glm.families.Binomial())

def test_linear_component_formula_from_calling_scope(self):
"""Formula can extract variables from the calling scope."""
z = pd.Series([10, 20, 30])
df = pd.DataFrame({"y": [0, 1, 0], "x": [1.0, 2.0, 3.0]})
LinearComponent.from_formula("y ~ x + z", df)