|
10 | 10 | import theano.tensor as tt
|
11 | 11 | import theano
|
12 | 12 | from theano.scalar import UnaryScalarOp, upgrade_to_float
|
| 13 | +from theano.tensor.slinalg import Cholesky |
13 | 14 |
|
14 | 15 | from .special import gammaln
|
15 | 16 | from pymc3.theanof import floatX
|
16 | 17 |
|
17 |
| -from six.moves import xrange |
18 |
| -from functools import partial |
19 | 18 |
|
20 | 19 | f = floatX
|
21 | 20 | c = - .5 * np.log(2. * np.pi)
|
@@ -166,7 +165,7 @@ def MvNormalLogp():
|
166 | 165 |
|
167 | 166 | solve_lower = tt.slinalg.Solve(A_structure='lower_triangular')
|
168 | 167 | solve_upper = tt.slinalg.Solve(A_structure='upper_triangular')
|
169 |
| - cholesky = Cholesky(nofail=True, lower=True) |
| 168 | + cholesky = Cholesky(lower=True, on_error='nan') |
170 | 169 |
|
171 | 170 | n, k = delta.shape
|
172 | 171 | n, k = f(n), f(k)
|
@@ -213,88 +212,6 @@ def dlogp(inputs, gradients):
|
213 | 212 | [cov, delta], [logp], grad_overrides=dlogp, inline=True)
|
214 | 213 |
|
215 | 214 |
|
216 |
| -class Cholesky(theano.Op): |
217 |
| - """ |
218 |
| - Return a triangular matrix square root of positive semi-definite `x`. |
219 |
| -
|
220 |
| - This is a copy of the cholesky op in theano, that doesn't throw an |
221 |
| - error if the matrix is not positive definite, but instead returns |
222 |
| - nan. |
223 |
| -
|
224 |
| - This has been merged upstream and we should switch to that |
225 |
| - version after the next theano release. |
226 |
| -
|
227 |
| - L = cholesky(X, lower=True) implies dot(L, L.T) == X. |
228 |
| - """ |
229 |
| - __props__ = ('lower', 'destructive', 'nofail') |
230 |
| - |
231 |
| - def __init__(self, lower=True, nofail=False): |
232 |
| - self.lower = lower |
233 |
| - self.destructive = False |
234 |
| - self.nofail = nofail |
235 |
| - |
236 |
| - def make_node(self, x): |
237 |
| - x = tt.as_tensor_variable(x) |
238 |
| - if x.ndim != 2: |
239 |
| - raise ValueError('Matrix must me two dimensional.') |
240 |
| - return tt.Apply(self, [x], [x.type()]) |
241 |
| - |
242 |
| - def perform(self, node, inputs, outputs): |
243 |
| - x = inputs[0] |
244 |
| - z = outputs[0] |
245 |
| - try: |
246 |
| - z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype) |
247 |
| - except (ValueError, scipy.linalg.LinAlgError): |
248 |
| - if self.nofail: |
249 |
| - z[0] = np.eye(x.shape[-1]) |
250 |
| - z[0][0, 0] = np.nan |
251 |
| - else: |
252 |
| - raise |
253 |
| - |
254 |
| - def grad(self, inputs, gradients): |
255 |
| - """ |
256 |
| - Cholesky decomposition reverse-mode gradient update. |
257 |
| -
|
258 |
| - Symbolic expression for reverse-mode Cholesky gradient taken from [0]_ |
259 |
| -
|
260 |
| - References |
261 |
| - ---------- |
262 |
| - .. [0] I. Murray, "Differentiation of the Cholesky decomposition", |
263 |
| - http://arxiv.org/abs/1602.07527 |
264 |
| -
|
265 |
| - """ |
266 |
| - |
267 |
| - x = inputs[0] |
268 |
| - dz = gradients[0] |
269 |
| - chol_x = self(x) |
270 |
| - ok = tt.all(tt.nlinalg.diag(chol_x) > 0) |
271 |
| - chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1)) |
272 |
| - dz = tt.switch(ok, dz, floatX(1)) |
273 |
| - |
274 |
| - # deal with upper triangular by converting to lower triangular |
275 |
| - if not self.lower: |
276 |
| - chol_x = chol_x.T |
277 |
| - dz = dz.T |
278 |
| - |
279 |
| - def tril_and_halve_diagonal(mtx): |
280 |
| - """Extracts lower triangle of square matrix and halves diagonal.""" |
281 |
| - return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.) |
282 |
| - |
283 |
| - def conjugate_solve_triangular(outer, inner): |
284 |
| - """Computes L^{-T} P L^{-1} for lower-triangular L.""" |
285 |
| - solve = tt.slinalg.Solve(A_structure="upper_triangular") |
286 |
| - return solve(outer.T, solve(outer.T, inner.T).T) |
287 |
| - |
288 |
| - s = conjugate_solve_triangular( |
289 |
| - chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) |
290 |
| - |
291 |
| - if self.lower: |
292 |
| - grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s)) |
293 |
| - else: |
294 |
| - grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s)) |
295 |
| - return [tt.switch(ok, grad, floatX(np.nan))] |
296 |
| - |
297 |
| - |
298 | 215 | class SplineWrapper(theano.Op):
|
299 | 216 | """
|
300 | 217 | Creates a theano operation from scipy.interpolate.UnivariateSpline
|
@@ -332,137 +249,6 @@ def grad(self, inputs, grads):
|
332 | 249 | return [x_grad * self.grad_op(x)]
|
333 | 250 |
|
334 | 251 |
|
335 |
| -# Custom Eigh, EighGrad, and eigh are required until |
336 |
| -# https://github.com/Theano/Theano/pull/6557 is handled, since lambda's |
337 |
| -# cannot be used with pickling. |
338 |
| -class Eigh(tt.nlinalg.Eig): |
339 |
| - """ |
340 |
| - Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix. |
341 |
| -
|
342 |
| - This is a copy of Eigh from theano that calls an EighGrad which uses |
343 |
| - partial instead of lambda. Once this has been merged with theano this |
344 |
| - should be removed. |
345 |
| - """ |
346 |
| - |
347 |
| - _numop = staticmethod(np.linalg.eigh) |
348 |
| - __props__ = ('UPLO',) |
349 |
| - |
350 |
| - def __init__(self, UPLO='L'): |
351 |
| - assert UPLO in ['L', 'U'] |
352 |
| - self.UPLO = UPLO |
353 |
| - |
354 |
| - def make_node(self, x): |
355 |
| - x = tt.as_tensor_variable(x) |
356 |
| - assert x.ndim == 2 |
357 |
| - # Numpy's linalg.eigh may return either double or single |
358 |
| - # presision eigenvalues depending on installed version of |
359 |
| - # LAPACK. Rather than trying to reproduce the (rather |
360 |
| - # involved) logic, we just probe linalg.eigh with a trivial |
361 |
| - # input. |
362 |
| - w_dtype = self._numop([[np.dtype(x.dtype).type()]])[0].dtype.name |
363 |
| - w = theano.tensor.vector(dtype=w_dtype) |
364 |
| - v = theano.tensor.matrix(dtype=x.dtype) |
365 |
| - return theano.gof.Apply(self, [x], [w, v]) |
366 |
| - |
367 |
| - def perform(self, node, inputs, outputs): |
368 |
| - (x,) = inputs |
369 |
| - (w, v) = outputs |
370 |
| - w[0], v[0] = self._numop(x, self.UPLO) |
371 |
| - |
372 |
| - def grad(self, inputs, g_outputs): |
373 |
| - r"""The gradient function should return |
374 |
| - .. math:: \sum_n\left(W_n\frac{\partial\,w_n} |
375 |
| - {\partial a_{ij}} + |
376 |
| - \sum_k V_{nk}\frac{\partial\,v_{nk}} |
377 |
| - {\partial a_{ij}}\right), |
378 |
| - where [:math:`W`, :math:`V`] corresponds to ``g_outputs``, |
379 |
| - :math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`. |
380 |
| - Analytic formulae for eigensystem gradients are well-known in |
381 |
| - perturbation theory: |
382 |
| - .. math:: \frac{\partial\,w_n} |
383 |
| - {\partial a_{ij}} = v_{in}\,v_{jn} |
384 |
| - .. math:: \frac{\partial\,v_{kn}} |
385 |
| - {\partial a_{ij}} = |
386 |
| - \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m} |
387 |
| - """ |
388 |
| - x, = inputs |
389 |
| - w, v = self(x) |
390 |
| - # Replace gradients wrt disconnected variables with |
391 |
| - # zeros. This is a work-around for issue #1063. |
392 |
| - gw, gv = tt.nlinalg._zero_disconnected([w, v], g_outputs) |
393 |
| - return [EighGrad(self.UPLO)(x, w, v, gw, gv)] |
394 |
| - |
395 |
| - |
396 |
| -class EighGrad(theano.Op): |
397 |
| - """ |
398 |
| - Gradient of an eigensystem of a Hermitian matrix. |
399 |
| -
|
400 |
| - This is a copy of EighGrad from theano that uses partial instead of lambda. |
401 |
| - Once this has been merged with theano this should be removed. |
402 |
| - """ |
403 |
| - |
404 |
| - __props__ = ('UPLO',) |
405 |
| - |
406 |
| - def __init__(self, UPLO='L'): |
407 |
| - assert UPLO in ['L', 'U'] |
408 |
| - self.UPLO = UPLO |
409 |
| - if UPLO == 'L': |
410 |
| - self.tri0 = np.tril |
411 |
| - self.tri1 = partial(np.triu, k=1) |
412 |
| - else: |
413 |
| - self.tri0 = np.triu |
414 |
| - self.tri1 = partial(np.tril, k=-1) |
415 |
| - |
416 |
| - def make_node(self, x, w, v, gw, gv): |
417 |
| - x, w, v, gw, gv = map(tt.as_tensor_variable, (x, w, v, gw, gv)) |
418 |
| - assert x.ndim == 2 |
419 |
| - assert w.ndim == 1 |
420 |
| - assert v.ndim == 2 |
421 |
| - assert gw.ndim == 1 |
422 |
| - assert gv.ndim == 2 |
423 |
| - out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype, |
424 |
| - gw.dtype, gv.dtype) |
425 |
| - out = theano.tensor.matrix(dtype=out_dtype) |
426 |
| - return theano.gof.Apply(self, [x, w, v, gw, gv], [out]) |
427 |
| - |
428 |
| - def perform(self, node, inputs, outputs): |
429 |
| - """ |
430 |
| - Implements the "reverse-mode" gradient for the eigensystem of |
431 |
| - a square matrix. |
432 |
| - """ |
433 |
| - x, w, v, W, V = inputs |
434 |
| - N = x.shape[0] |
435 |
| - outer = np.outer |
436 |
| - |
437 |
| - def G(n): |
438 |
| - return sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m]) |
439 |
| - for m in xrange(N) if m != n) |
440 |
| - |
441 |
| - g = sum(outer(v[:, n], v[:, n] * W[n] + G(n)) |
442 |
| - for n in xrange(N)) |
443 |
| - |
444 |
| - # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a) |
445 |
| - # (triu(a)) only. This means that partial derivative of |
446 |
| - # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero |
447 |
| - # for i < j (i > j). At the same time, non-zero components of |
448 |
| - # the gradient must account for the fact that variation of the |
449 |
| - # opposite triangle contributes to variation of two elements |
450 |
| - # of Hermitian (symmetric) matrix. The following line |
451 |
| - # implements the necessary logic. |
452 |
| - out = self.tri0(g) + self.tri1(g).T |
453 |
| - |
454 |
| - # Make sure we return the right dtype even if NumPy performed |
455 |
| - # upcasting in self.tri0. |
456 |
| - outputs[0][0] = np.asarray(out, dtype=node.outputs[0].dtype) |
457 |
| - |
458 |
| - def infer_shape(self, node, shapes): |
459 |
| - return [shapes[0]] |
460 |
| - |
461 |
| - |
462 |
| -def eigh(a, UPLO='L'): |
463 |
| - """A copy, remove with Eigh and EighGrad when possible""" |
464 |
| - return Eigh(UPLO)(a) |
465 |
| - |
466 | 252 |
|
467 | 253 | class I0e(UnaryScalarOp):
|
468 | 254 | """
|
|
0 commit comments