From 5852a5b90e1e63498223bd8c6a2bd1e3cbe58545 Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Mon, 25 Feb 2019 12:41:13 +0100 Subject: [PATCH 1/9] Changed Categorical to work with multidim p at the logp level. --- RELEASE-NOTES.md | 3 +++ pymc3/distributions/discrete.py | 33 ++++++++++++++++++----- pymc3/tests/test_distribution_defaults.py | 9 ++++++- pymc3/tests/test_distributions.py | 18 ++++++++++++- pymc3/tests/test_distributions_random.py | 3 +++ 5 files changed, 57 insertions(+), 9 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 73cb1dfefb..a6624c34e5 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -23,6 +23,9 @@ - Fixed incorrect usage of `broadcast_distribution_samples` in `DiscreteWeibull`. - `Mixture`'s default dtype is now determined by `theano.config.floatX`. - `dist_math.random_choice` now handles nd-arrays of category probabilities, and also handles sizes that are not `None`. Also removed unused `k` kwarg from `dist_math.random_choice`. +- Changed `Categorical.mode` to preserve all the dimensions of `p` except the last one, which encodes each category's probability. +- Changed initialization of `Categorical.p`. `p` is now normalized to sum to `1` inside `logp` and `random`, but not during initialization. This could hide negative values supplied to `p` as mentioned in #2082. +- To be able to test for negative `p` values supplied to `Categorical`, `Categorical.logp` was changed to check for `sum(self.p, axis=-1) == 1` only if `self.p` is not a `Number`, `np.ndarray`, `TensorConstant` or `SharedVariable`. These cases are automatically normalized to sum to `1`. The other condition may originate from a `step_method` proposal, where `self.p` tensor's value may be set, but must sum to 1 nevertheless. This may break old code which intialized `p` with a theano expression and relied on the default normalization to get it to sum to 1. `Categorical.logp` now also checks that the used `p` has values lower than 1. ### Deprecations diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index e74293b068..4047fab7a2 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1,3 +1,4 @@ +import numbers import numpy as np import theano import theano.tensor as tt @@ -710,11 +711,15 @@ def __init__(self, p, *args, **kwargs): except AttributeError: self.k = tt.shape(p)[-1] p = tt.as_tensor_variable(floatX(p)) - self.p = (p.T / tt.sum(p, -1)).T - self.mode = tt.argmax(p) + + # From #2082, it may be dangerous to automatically rescale p at this + # point without checking for positiveness + self.p = p + self.mode = tt.argmax(p, axis=-1) def random(self, point=None, size=None): p, k = draw_values([self.p, self.k], point=point, size=size) + p = p / np.sum(p, axis=-1, keepdims=True) return generate_samples(random_choice, p=p, @@ -723,21 +728,35 @@ def random(self, point=None, size=None): size=size) def logp(self, value): - p = self.p + p_ = self.p k = self.k # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) - sumto1 = theano.gradient.zero_grad( - tt.le(abs(tt.sum(p, axis=-1) - 1), 1e-5)) + # We must only check that the values sum to 1 if p comes from a + # tensor variable, i.e. when p is a step_method proposal. In the other + # cases we normalize ourselves + if not isinstance(p_, (numbers.Number, + np.ndarray, + tt.TensorConstant, + tt.sharedvar.SharedVariable)): + sumto1 = theano.gradient.zero_grad( + tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5)) + p = p_ + else: + p = p_ / tt.sum(p_, axis=-1, keepdims=True) + sumto1 = True + if p.ndim > 1: - a = tt.log(p[tt.arange(p.shape[0]), value_clip]) + # Fancy trick to index the last axis without touching the other + a = tt.log((p.T)[value_clip].T) else: a = tt.log(p[value_clip]) - return bound(a, value >= 0, value <= (k - 1), sumto1) + return bound(a, value >= 0, value <= (k - 1), sumto1, + tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) def _repr_latex_(self, name=None, dist=None): if dist is None: diff --git a/pymc3/tests/test_distribution_defaults.py b/pymc3/tests/test_distribution_defaults.py index 510d3998cd..9fb8b69c76 100644 --- a/pymc3/tests/test_distribution_defaults.py +++ b/pymc3/tests/test_distribution_defaults.py @@ -1,5 +1,5 @@ from ..model import Model -from ..distributions import DiscreteUniform, Continuous +from ..distributions import DiscreteUniform, Continuous, Categorical import numpy as np import pytest @@ -67,3 +67,10 @@ def test_discrete_uniform_negative(): with model: x = DiscreteUniform('x', lower=-10, upper=0) assert model.test_point['x'] == -5 + + +def test_categorical_mode(): + model = Model() + with model: + x = Categorical('x', p=np.eye(4), shape=4) + assert np.allclose(model.test_point['x'], np.arange(4)) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index e10a34c6d5..bed96b0448 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -321,7 +321,7 @@ def dirichlet_logpdf(value, a): def categorical_logpdf(value, p): if value >= 0 and value <= len(p): - return floatX(np.log(p[value])) + return floatX(np.log((p.T)[value]).T) else: return -inf @@ -1079,6 +1079,22 @@ def test_categorical_bounds(self): assert np.isinf(x.logp({'x': -1})) assert np.isinf(x.logp({'x': 3})) + def test_categorical_valid_p(self): + with Model(): + x = Categorical('x', p=np.array([-0.2, 0.3, 0.5])) + assert np.isinf(x.logp({'x': 0})) + assert np.isinf(x.logp({'x': 1})) + with Model(): + # Hard edge case from #2082 + # Early automatic normalization of p's sum would hide the negative + # entries if there is a single or pair number of negative values + # and the rest are zero + x = Categorical('x', p=np.array([-1, -1, 0, 0])) + assert np.isinf(x.logp({'x': 0})) + assert np.isinf(x.logp({'x': 1})) + assert np.isinf(x.logp({'x': 2})) + assert np.isinf(x.logp({'x': 3})) + @pytest.mark.parametrize('n', [2, 3, 4]) def test_categorical(self, n): self.pymc3_matches_scipy(Categorical, Domain(range(n), 'int64'), {'p': Simplex(n)}, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 26876ee04c..6fa49ca843 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -444,6 +444,9 @@ def test_probability_vector_shape(self): p = np.ones((10, 5)) assert pm.Categorical.dist(p=p).random().shape == (10,) assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 10) + p = np.ones((3, 7, 5)) + assert pm.Categorical.dist(p=p).random().shape == (3, 7) + assert pm.Categorical.dist(p=p).random(size=4).shape == (4, 3, 7) class TestScalarParameterSamples(SeededTest): From ec7714138a3398473af91e94a4a7b7aceaf08de0 Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Mon, 25 Feb 2019 15:49:18 +0100 Subject: [PATCH 2/9] Fixed problems with OrderedLogistic. --- pymc3/distributions/discrete.py | 7 ++++--- pymc3/tests/test_distributions.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 4047fab7a2..4ec67a01ae 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -716,6 +716,8 @@ def __init__(self, p, *args, **kwargs): # point without checking for positiveness self.p = p self.mode = tt.argmax(p, axis=-1) + if self.mode.ndim == 1: + self.mode = tt.squeeze(self.mode) def random(self, point=None, size=None): p, k = draw_values([self.p, self.k], point=point, size=size) @@ -748,7 +750,6 @@ def logp(self, value): p = p_ / tt.sum(p_, axis=-1, keepdims=True) sumto1 = True - if p.ndim > 1: # Fancy trick to index the last axis without touching the other a = tt.log((p.T)[value_clip].T) @@ -756,7 +757,7 @@ def logp(self, value): a = tt.log(p[value_clip]) return bound(a, value >= 0, value <= (k - 1), sumto1, - tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) + tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1)) def _repr_latex_(self, name=None, dist=None): if dist is None: @@ -1196,7 +1197,7 @@ def __init__(self, eta, cutpoints, *args, **kwargs): tt.zeros_like(tt.shape_padright(pa[:, 0])), pa, tt.ones_like(tt.shape_padright(pa[:, 0])) - ], axis=1) + ], axis=-1) p = p_cum[:, 1:] - p_cum[:, :-1] super().__init__(p=p, *args, **kwargs) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index bed96b0448..8890e23799 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -346,8 +346,10 @@ def invlogit(x, eps=sys.float_info.epsilon): def orderedlogistic_logpdf(value, eta, cutpoints): c = np.concatenate(([-np.inf], cutpoints, [np.inf])) - p = invlogit(eta - c[value]) - invlogit(eta - c[value + 1]) - return np.log(p) + ps = np.array([invlogit(eta - cc) - invlogit(eta - cc1) + for cc, cc1 in zip(c[:-1], c[1:])]) + p = ps[value] + return np.where(np.all(ps > 0), np.log(p), -np.inf) class Simplex: def __init__(self, n): From 0369ffac9beeb12e06b312e4c5eab95650e4c9f2 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Mon, 25 Feb 2019 21:19:20 +0100 Subject: [PATCH 3/9] Use np.moveaxis instead of transposing. Also added some more tests. --- pymc3/distributions/discrete.py | 3 +-- pymc3/tests/test_distributions.py | 9 ++++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 4ec67a01ae..952f987773 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -751,8 +751,7 @@ def logp(self, value): sumto1 = True if p.ndim > 1: - # Fancy trick to index the last axis without touching the other - a = tt.log((p.T)[value_clip].T) + a = tt.log(np.moveaxis(p, -1, 0)[value_clip]) else: a = tt.log(p[value_clip]) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 8890e23799..05eb14ea82 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -321,7 +321,7 @@ def dirichlet_logpdf(value, a): def categorical_logpdf(value, p): if value >= 0 and value <= len(p): - return floatX(np.log((p.T)[value]).T) + return floatX(np.log(np.moveaxis(p, -1, 0)[value])) else: return -inf @@ -1086,6 +1086,13 @@ def test_categorical_valid_p(self): x = Categorical('x', p=np.array([-0.2, 0.3, 0.5])) assert np.isinf(x.logp({'x': 0})) assert np.isinf(x.logp({'x': 1})) + assert np.isinf(x.logp({'x': 2})) + with Model(): + # A model where p sums to 1 but contains negative values + x = Categorical('x', p=np.array([-0.2, 0.7, 0.5])) + assert np.isinf(x.logp({'x': 0})) + assert np.isinf(x.logp({'x': 1})) + assert np.isinf(x.logp({'x': 2})) with Model(): # Hard edge case from #2082 # Early automatic normalization of p's sum would hide the negative From 7e899c6cdb3a3c6d84910c85e195f826db958342 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Tue, 26 Feb 2019 21:53:37 +0100 Subject: [PATCH 4/9] Changed np.moveaxis to dimshuffle. Removed isinstance conditions that were imposible. --- pymc3/distributions/discrete.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 952f987773..e3f8bd33cb 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -739,9 +739,7 @@ def logp(self, value): # We must only check that the values sum to 1 if p comes from a # tensor variable, i.e. when p is a step_method proposal. In the other # cases we normalize ourselves - if not isinstance(p_, (numbers.Number, - np.ndarray, - tt.TensorConstant, + if not isinstance(p_, (tt.TensorConstant, tt.sharedvar.SharedVariable)): sumto1 = theano.gradient.zero_grad( tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5)) @@ -751,7 +749,8 @@ def logp(self, value): sumto1 = True if p.ndim > 1: - a = tt.log(np.moveaxis(p, -1, 0)[value_clip]) + pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) + a = tt.log(p.dimshuffle(pattern)[value_clip]) else: a = tt.log(p[value_clip]) From beb9cef99ee04a3d3cd15e9b31d48d8740512a61 Mon Sep 17 00:00:00 2001 From: lucianopaz Date: Wed, 27 Feb 2019 07:43:42 +0100 Subject: [PATCH 5/9] Fixed lint error --- pymc3/distributions/discrete.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index e3f8bd33cb..1a1dddf9fa 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1,4 +1,3 @@ -import numbers import numpy as np import theano import theano.tensor as tt From 8e03412b3524d31716086ff2ffc7bf8dbdca875c Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Wed, 27 Feb 2019 14:51:01 +0100 Subject: [PATCH 6/9] Force a normalized p in logp --- pymc3/distributions/discrete.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 1a1dddf9fa..7d940bf295 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -735,17 +735,7 @@ def logp(self, value): # Clip values before using them for indexing value_clip = tt.clip(value, 0, k - 1) - # We must only check that the values sum to 1 if p comes from a - # tensor variable, i.e. when p is a step_method proposal. In the other - # cases we normalize ourselves - if not isinstance(p_, (tt.TensorConstant, - tt.sharedvar.SharedVariable)): - sumto1 = theano.gradient.zero_grad( - tt.le(abs(tt.sum(p_, axis=-1) - 1), 1e-5)) - p = p_ - else: - p = p_ / tt.sum(p_, axis=-1, keepdims=True) - sumto1 = True + p = p_ / tt.sum(p_, axis=-1, keepdims=True) if p.ndim > 1: pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) @@ -753,7 +743,7 @@ def logp(self, value): else: a = tt.log(p[value_clip]) - return bound(a, value >= 0, value <= (k - 1), sumto1, + return bound(a, value >= 0, value <= (k - 1), tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1)) def _repr_latex_(self, name=None, dist=None): From 2dfdd70626342d2c13282219639ef350712c3207 Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Thu, 28 Feb 2019 09:37:58 +0100 Subject: [PATCH 7/9] Fixed lint error --- pymc3/distributions/discrete.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 7d940bf295..1dbe399363 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1,5 +1,4 @@ import numpy as np -import theano import theano.tensor as tt from scipy import stats import warnings @@ -744,7 +743,7 @@ def logp(self, value): a = tt.log(p[value_clip]) return bound(a, value >= 0, value <= (k - 1), - tt.all(p_ > 0, axis=-1), tt.all(p <= 1, axis=-1)) + tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) def _repr_latex_(self, name=None, dist=None): if dist is None: From 1953fdeebfd09b711d8a6843b410d630408e49b1 Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Thu, 28 Feb 2019 09:56:34 +0100 Subject: [PATCH 8/9] Edited release notes. Categorical.p can have zeros, and the normalization of p is always carried out --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index d8bd2239ce..4806c7aa94 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -26,7 +26,7 @@ - `dist_math.random_choice` now handles nd-arrays of category probabilities, and also handles sizes that are not `None`. Also removed unused `k` kwarg from `dist_math.random_choice`. - Changed `Categorical.mode` to preserve all the dimensions of `p` except the last one, which encodes each category's probability. - Changed initialization of `Categorical.p`. `p` is now normalized to sum to `1` inside `logp` and `random`, but not during initialization. This could hide negative values supplied to `p` as mentioned in #2082. -- To be able to test for negative `p` values supplied to `Categorical`, `Categorical.logp` was changed to check for `sum(self.p, axis=-1) == 1` only if `self.p` is not a `Number`, `np.ndarray`, `TensorConstant` or `SharedVariable`. These cases are automatically normalized to sum to `1`. The other condition may originate from a `step_method` proposal, where `self.p` tensor's value may be set, but must sum to 1 nevertheless. This may break old code which intialized `p` with a theano expression and relied on the default normalization to get it to sum to 1. `Categorical.logp` now also checks that the used `p` has values lower than 1. +- `Categorical` now accepts elements of `p` equal to `0`. `logp` will return `-inf` if there are `values` that index to the zero probability categories. ### Deprecations From 88c9e725fb4bd578595a1e9bb2ba52744f8deec4 Mon Sep 17 00:00:00 2001 From: Luciano Paz Date: Thu, 28 Feb 2019 10:28:26 +0100 Subject: [PATCH 9/9] Fixed orderedlogistic test logpdf to check for ps>=0 and not just ps>0 --- pymc3/tests/test_distributions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 05eb14ea82..dbb02d36d8 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -349,7 +349,7 @@ def orderedlogistic_logpdf(value, eta, cutpoints): ps = np.array([invlogit(eta - cc) - invlogit(eta - cc1) for cc, cc1 in zip(c[:-1], c[1:])]) p = ps[value] - return np.where(np.all(ps > 0), np.log(p), -np.inf) + return np.where(np.all(ps >= 0), np.log(p), -np.inf) class Simplex: def __init__(self, n):