Skip to content

Commit 8fece62

Browse files
authored
Merge pull request #3038 from junpenglao/use_theano_ops
Use theano Ops available in the newest release
2 parents 1e5867f + f38ad05 commit 8fece62

File tree

4 files changed

+13
-227
lines changed

4 files changed

+13
-227
lines changed

pymc3/distributions/dist_math.py

+2-216
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
import theano.tensor as tt
1111
import theano
1212
from theano.scalar import UnaryScalarOp, upgrade_to_float
13+
from theano.tensor.slinalg import Cholesky
1314

1415
from .special import gammaln
1516
from pymc3.theanof import floatX
1617

17-
from six.moves import xrange
18-
from functools import partial
1918

2019
f = floatX
2120
c = - .5 * np.log(2. * np.pi)
@@ -166,7 +165,7 @@ def MvNormalLogp():
166165

167166
solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
168167
solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
169-
cholesky = Cholesky(nofail=True, lower=True)
168+
cholesky = Cholesky(lower=True, on_error='nan')
170169

171170
n, k = delta.shape
172171
n, k = f(n), f(k)
@@ -213,88 +212,6 @@ def dlogp(inputs, gradients):
213212
[cov, delta], [logp], grad_overrides=dlogp, inline=True)
214213

215214

216-
class Cholesky(theano.Op):
217-
"""
218-
Return a triangular matrix square root of positive semi-definite `x`.
219-
220-
This is a copy of the cholesky op in theano, that doesn't throw an
221-
error if the matrix is not positive definite, but instead returns
222-
nan.
223-
224-
This has been merged upstream and we should switch to that
225-
version after the next theano release.
226-
227-
L = cholesky(X, lower=True) implies dot(L, L.T) == X.
228-
"""
229-
__props__ = ('lower', 'destructive', 'nofail')
230-
231-
def __init__(self, lower=True, nofail=False):
232-
self.lower = lower
233-
self.destructive = False
234-
self.nofail = nofail
235-
236-
def make_node(self, x):
237-
x = tt.as_tensor_variable(x)
238-
if x.ndim != 2:
239-
raise ValueError('Matrix must me two dimensional.')
240-
return tt.Apply(self, [x], [x.type()])
241-
242-
def perform(self, node, inputs, outputs):
243-
x = inputs[0]
244-
z = outputs[0]
245-
try:
246-
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
247-
except (ValueError, scipy.linalg.LinAlgError):
248-
if self.nofail:
249-
z[0] = np.eye(x.shape[-1])
250-
z[0][0, 0] = np.nan
251-
else:
252-
raise
253-
254-
def grad(self, inputs, gradients):
255-
"""
256-
Cholesky decomposition reverse-mode gradient update.
257-
258-
Symbolic expression for reverse-mode Cholesky gradient taken from [0]_
259-
260-
References
261-
----------
262-
.. [0] I. Murray, "Differentiation of the Cholesky decomposition",
263-
http://arxiv.org/abs/1602.07527
264-
265-
"""
266-
267-
x = inputs[0]
268-
dz = gradients[0]
269-
chol_x = self(x)
270-
ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
271-
chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
272-
dz = tt.switch(ok, dz, floatX(1))
273-
274-
# deal with upper triangular by converting to lower triangular
275-
if not self.lower:
276-
chol_x = chol_x.T
277-
dz = dz.T
278-
279-
def tril_and_halve_diagonal(mtx):
280-
"""Extracts lower triangle of square matrix and halves diagonal."""
281-
return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)
282-
283-
def conjugate_solve_triangular(outer, inner):
284-
"""Computes L^{-T} P L^{-1} for lower-triangular L."""
285-
solve = tt.slinalg.Solve(A_structure="upper_triangular")
286-
return solve(outer.T, solve(outer.T, inner.T).T)
287-
288-
s = conjugate_solve_triangular(
289-
chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))
290-
291-
if self.lower:
292-
grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
293-
else:
294-
grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
295-
return [tt.switch(ok, grad, floatX(np.nan))]
296-
297-
298215
class SplineWrapper(theano.Op):
299216
"""
300217
Creates a theano operation from scipy.interpolate.UnivariateSpline
@@ -332,137 +249,6 @@ def grad(self, inputs, grads):
332249
return [x_grad * self.grad_op(x)]
333250

334251

335-
# Custom Eigh, EighGrad, and eigh are required until
336-
# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's
337-
# cannot be used with pickling.
338-
class Eigh(tt.nlinalg.Eig):
339-
"""
340-
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
341-
342-
This is a copy of Eigh from theano that calls an EighGrad which uses
343-
partial instead of lambda. Once this has been merged with theano this
344-
should be removed.
345-
"""
346-
347-
_numop = staticmethod(np.linalg.eigh)
348-
__props__ = ('UPLO',)
349-
350-
def __init__(self, UPLO='L'):
351-
assert UPLO in ['L', 'U']
352-
self.UPLO = UPLO
353-
354-
def make_node(self, x):
355-
x = tt.as_tensor_variable(x)
356-
assert x.ndim == 2
357-
# Numpy's linalg.eigh may return either double or single
358-
# presision eigenvalues depending on installed version of
359-
# LAPACK. Rather than trying to reproduce the (rather
360-
# involved) logic, we just probe linalg.eigh with a trivial
361-
# input.
362-
w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name
363-
w = theano.tensor.vector(dtype=w_dtype)
364-
v = theano.tensor.matrix(dtype=x.dtype)
365-
return theano.gof.Apply(self, [x], [w, v])
366-
367-
def perform(self, node, inputs, outputs):
368-
(x,) = inputs
369-
(w, v) = outputs
370-
w[0], v[0] = self._numop(x, self.UPLO)
371-
372-
def grad(self, inputs, g_outputs):
373-
r"""The gradient function should return
374-
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
375-
{\partial a_{ij}} +
376-
\sum_k V_{nk}\frac{\partial\,v_{nk}}
377-
{\partial a_{ij}}\right),
378-
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
379-
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
380-
Analytic formulae for eigensystem gradients are well-known in
381-
perturbation theory:
382-
.. math:: \frac{\partial\,w_n}
383-
{\partial a_{ij}} = v_{in}\,v_{jn}
384-
.. math:: \frac{\partial\,v_{kn}}
385-
{\partial a_{ij}} =
386-
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
387-
"""
388-
x, = inputs
389-
w, v = self(x)
390-
# Replace gradients wrt disconnected variables with
391-
# zeros. This is a work-around for issue #1063.
392-
gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs)
393-
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
394-
395-
396-
class EighGrad(theano.Op):
397-
"""
398-
Gradient of an eigensystem of a Hermitian matrix.
399-
400-
This is a copy of EighGrad from theano that uses partial instead of lambda.
401-
Once this has been merged with theano this should be removed.
402-
"""
403-
404-
__props__ = ('UPLO',)
405-
406-
def __init__(self, UPLO='L'):
407-
assert UPLO in ['L', 'U']
408-
self.UPLO = UPLO
409-
if UPLO == 'L':
410-
self.tri0 = np.tril
411-
self.tri1 = partial(np.triu, k=1)
412-
else:
413-
self.tri0 = np.triu
414-
self.tri1 = partial(np.tril, k=-1)
415-
416-
def make_node(self, x, w, v, gw, gv):
417-
x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv))
418-
assert x.ndim == 2
419-
assert w.ndim == 1
420-
assert v.ndim == 2
421-
assert gw.ndim == 1
422-
assert gv.ndim == 2
423-
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
424-
gw.dtype, gv.dtype)
425-
out = theano.tensor.matrix(dtype=out_dtype)
426-
return theano.gof.Apply(self, [x, w, v, gw, gv], [out])
427-
428-
def perform(self, node, inputs, outputs):
429-
"""
430-
Implements the "reverse-mode" gradient for the eigensystem of
431-
a square matrix.
432-
"""
433-
x, w, v, W, V = inputs
434-
N = x.shape[0]
435-
outer = np.outer
436-
437-
def G(n):
438-
return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
439-
for m in xrange(N) if m != n)
440-
441-
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
442-
for n in xrange(N))
443-
444-
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
445-
# (triu(a)) only. This means that partial derivative of
446-
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
447-
# for i < j (i > j). At the same time, non-zero components of
448-
# the gradient must account for the fact that variation of the
449-
# opposite triangle contributes to variation of two elements
450-
# of Hermitian (symmetric) matrix. The following line
451-
# implements the necessary logic.
452-
out = self.tri0(g) + self.tri1(g).T
453-
454-
# Make sure we return the right dtype even if NumPy performed
455-
# upcasting in self.tri0.
456-
outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype)
457-
458-
def infer_shape(self, node, shapes):
459-
return [shapes[0]]
460-
461-
462-
def eigh(a, UPLO='L'):
463-
"""A copy, remove with Eigh and EighGrad when possible"""
464-
return Eigh(UPLO)(a)
465-
466252

467253
class I0e(UnaryScalarOp):
468254
"""

pymc3/distributions/distribution.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ..vartypes import string_types
1111

1212
__all__ = ['DensityDist', 'Distribution', 'Continuous', 'Discrete',
13-
'NoDistribution', 'TensorType', 'draw_values']
13+
'NoDistribution', 'TensorType', 'draw_values', 'generate_samples']
1414

1515

1616
class _Unpickling(object):

pymc3/distributions/multivariate.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
from scipy import stats, linalg
1111

12-
from theano.tensor.nlinalg import det, matrix_inverse, trace
13-
import theano.tensor.slinalg
12+
from theano.tensor.nlinalg import det, matrix_inverse, trace, eigh
13+
from theano.tensor.slinalg import Cholesky
1414
import pymc3 as pm
1515

1616
from pymc3.theanof import floatX
@@ -20,7 +20,7 @@
2020
from ..model import Deterministic
2121
from .continuous import ChiSquared, Normal
2222
from .special import gammaln, multigammaln
23-
from .dist_math import bound, logpow, factln, Cholesky, eigh
23+
from .dist_math import bound, logpow, factln
2424
from ..math import kron_dot, kron_diag, kron_solve_lower, kronecker
2525

2626

@@ -49,7 +49,7 @@ def __init__(self, mu=None, cov=None, chol=None, tau=None, lower=True,
4949
# moment. We work around that by using a cholesky op
5050
# that returns a nan as first entry instead of raising
5151
# an error.
52-
cholesky = Cholesky(nofail=True, lower=True)
52+
cholesky = Cholesky(lower=True, on_error='nan')
5353

5454
if cov is not None:
5555
self.k = cov.shape[0]
@@ -605,9 +605,9 @@ def _repr_latex_(self, name=None, dist=None):
605605

606606
def posdef(AA):
607607
try:
608-
np.linalg.cholesky(AA)
608+
linalg.cholesky(AA)
609609
return 1
610-
except np.linalg.LinAlgError:
610+
except linalg.LinAlgError:
611611
return 0
612612

613613

@@ -796,7 +796,7 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
796796
if testval is not None:
797797
# Inverse transform
798798
testval = np.dot(np.dot(np.linalg.inv(L), testval), np.linalg.inv(L.T))
799-
testval = scipy.linalg.cholesky(testval, lower=True)
799+
testval = linalg.cholesky(testval, lower=True)
800800
diag_testval = testval[diag_idx]**2
801801
tril_testval = testval[tril_idx]
802802
else:
@@ -1226,7 +1226,7 @@ def __init__(self, mu=0, rowcov=None, rowchol=None, rowtau=None,
12261226
self.solve_upper = tt.slinalg.solve_upper_triangular
12271227

12281228
def _setup_matrices(self, colcov, colchol, coltau, rowcov, rowchol, rowtau):
1229-
cholesky = Cholesky(nofail=False, lower=True)
1229+
cholesky = Cholesky(lower=True, on_error='raise')
12301230

12311231
# Among-row matrices
12321232
if len([i for i in [rowtau, rowcov, rowchol] if i is not None]) != 1:
@@ -1439,7 +1439,7 @@ def __init__(self, mu, covs=None, chols=None, evds=None, sigma=None,
14391439
self.mean = self.median = self.mode = self.mu
14401440

14411441
def _setup(self, covs, chols, evds, sigma):
1442-
self.cholesky = Cholesky(nofail=False, lower=True)
1442+
self.cholesky = Cholesky(lower=True, on_error='raise')
14431443
if len([i for i in [covs, chols, evds] if i is not None]) != 1:
14441444
raise ValueError('Incompatible parameterization. '
14451445
'Specify exactly one of covs, chols, '

pymc3/gp/gp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pymc3.gp.util import (conditioned_vars, infer_shape,
1111
stabilize, cholesky, solve_lower, solve_upper)
1212
from pymc3.distributions import draw_values
13-
from pymc3.distributions.dist_math import eigh
13+
from theano.tensor.nlinalg import eigh
1414
from ..math import cartesian, kron_dot, kron_diag
1515

1616
__all__ = ['Latent', 'Marginal', 'TP', 'MarginalSparse', 'MarginalKron']

0 commit comments

Comments
 (0)