Skip to content

Commit ab96954

Browse files
fujiisoupshoyer
authored andcommitted
implement Gradient (#2398)
* Added xr.gradient, DataArray.gradient, Dataset.gradient * Working with np.backend * test is not passing * Docs * flake8 * support environment without dask * Support numpy < 1.13 * Support numpy 1.12 * simplify dask.gradient * lint * Use npcompat.gradient in tests * move gradient to dask_array_compat * gradient -> differentiate * lint * Update dask_array_compat * Added a link from diff * remove xr.differentiate * Added datetime support * Update via comment. Use utils.to_numeric also in interp * time_unit -> datetime_unit * Some more info in docs. * update test * Update via comments * Update docs.
1 parent c1c576f commit ab96954

13 files changed

+586
-10
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ Computation
150150
Dataset.resample
151151
Dataset.diff
152152
Dataset.quantile
153+
Dataset.differentiate
153154

154155
**Aggregation**:
155156
:py:attr:`~Dataset.all`
@@ -317,6 +318,7 @@ Computation
317318
DataArray.diff
318319
DataArray.dot
319320
DataArray.quantile
321+
DataArray.differentiate
320322

321323
**Aggregation**:
322324
:py:attr:`~DataArray.all`

doc/computation.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,29 @@ You can also use ``construct`` to compute a weighted rolling sum:
200200
To avoid this, use ``skipna=False`` as the above example.
201201

202202

203+
Computation using Coordinates
204+
=============================
205+
206+
Xarray objects have some handy methods for the computation with their
207+
coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by
208+
central finite differences using their coordinates,
209+
210+
.. ipython:: python
211+
a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3])
212+
a
213+
a.differentiate('x')
214+
215+
This method can be used also for multidimensional arrays,
216+
217+
.. ipython:: python
218+
a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'],
219+
coords=[0.1, 0.11, 0.2, 0.3])
220+
a.differentiate('x')
221+
222+
.. note::
223+
This method is limited to simple cartesian geometry. Differentiation along
224+
multidimensional coordinate is not supported.
225+
203226
.. _compute.broadcasting:
204227

205228
Broadcasting by dimension name

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ Documentation
3636
Enhancements
3737
~~~~~~~~~~~~
3838

39+
- :py:meth:`~xarray.DataArray.differentiate` and
40+
:py:meth:`~xarray.Dataset.differentiate` are newly added.
41+
(:issue:`1332`)
42+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
3943
- Default colormap for sequential and divergent data can now be set via
4044
:py:func:`~xarray.set_options()`
4145
(:issue:`2394`)

xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .core.alignment import align, broadcast, broadcast_arrays
1111
from .core.common import full_like, zeros_like, ones_like
1212
from .core.combine import concat, auto_combine
13-
from .core.computation import apply_ufunc, where, dot
13+
from .core.computation import apply_ufunc, dot, where
1414
from .core.extensions import (register_dataarray_accessor,
1515
register_dataset_accessor)
1616
from .core.variable import as_variable, Variable, IndexVariable, Coordinate

xarray/core/dask_array_compat.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from __future__ import absolute_import, division, print_function
22

3+
from distutils.version import LooseVersion
4+
35
import numpy as np
6+
from dask import __version__ as dask_version
47
import dask.array as da
58

69
try:
@@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False):
3033
if invert:
3134
result = ~result
3235
return result
36+
37+
38+
if LooseVersion(dask_version) > LooseVersion('1.19.2'):
39+
gradient = da.gradient
40+
41+
else: # pragma: no cover
42+
# Copied from dask v0.19.2
43+
# Used under the terms of Dask's license, see licenses/DASK_LICENSE.
44+
import math
45+
from numbers import Integral, Real
46+
47+
AxisError = np.AxisError
48+
49+
def validate_axis(axis, ndim):
50+
""" Validate an input to axis= keywords """
51+
if isinstance(axis, (tuple, list)):
52+
return tuple(validate_axis(ax, ndim) for ax in axis)
53+
if not isinstance(axis, Integral):
54+
raise TypeError("Axis value must be an integer, got %s" % axis)
55+
if axis < -ndim or axis >= ndim:
56+
raise AxisError("Axis %d is out of bounds for array of dimension "
57+
"%d" % (axis, ndim))
58+
if axis < 0:
59+
axis += ndim
60+
return axis
61+
62+
def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
63+
"""
64+
x: nd-array
65+
array of one block
66+
coord: 1d-array or scalar
67+
coordinate along which the gradient is computed.
68+
axis: int
69+
axis along which the gradient is computed
70+
array_locs:
71+
actual location along axis. None if coordinate is scalar
72+
grad_kwargs:
73+
keyword to be passed to np.gradient
74+
"""
75+
block_loc = block_id[axis]
76+
if array_locs is not None:
77+
coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]]
78+
grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
79+
return grad
80+
81+
def gradient(f, *varargs, **kwargs):
82+
f = da.asarray(f)
83+
84+
kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
85+
if kwargs["edge_order"] > 2:
86+
raise ValueError("edge_order must be less than or equal to 2.")
87+
88+
drop_result_list = False
89+
axis = kwargs.pop("axis", None)
90+
if axis is None:
91+
axis = tuple(range(f.ndim))
92+
elif isinstance(axis, Integral):
93+
drop_result_list = True
94+
axis = (axis,)
95+
96+
axis = validate_axis(axis, f.ndim)
97+
98+
if len(axis) != len(set(axis)):
99+
raise ValueError("duplicate axes not allowed")
100+
101+
axis = tuple(ax % f.ndim for ax in axis)
102+
103+
if varargs == ():
104+
varargs = (1,)
105+
if len(varargs) == 1:
106+
varargs = len(axis) * varargs
107+
if len(varargs) != len(axis):
108+
raise TypeError(
109+
"Spacing must either be a single scalar, or a scalar / "
110+
"1d-array per axis"
111+
)
112+
113+
if issubclass(f.dtype.type, (np.bool8, Integral)):
114+
f = f.astype(float)
115+
elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
116+
f = f.astype(float)
117+
118+
results = []
119+
for i, ax in enumerate(axis):
120+
for c in f.chunks[ax]:
121+
if np.min(c) < kwargs["edge_order"] + 1:
122+
raise ValueError(
123+
'Chunk size must be larger than edge_order + 1. '
124+
'Minimum chunk for aixs {} is {}. Rechunk to '
125+
'proceed.'.format(np.min(c), ax))
126+
127+
if np.isscalar(varargs[i]):
128+
array_locs = None
129+
else:
130+
if isinstance(varargs[i], da.Array):
131+
raise NotImplementedError(
132+
'dask array coordinated is not supported.')
133+
# coordinate position for each block taking overlap into
134+
# account
135+
chunk = np.array(f.chunks[ax])
136+
array_loc_stop = np.cumsum(chunk) + 1
137+
array_loc_start = array_loc_stop - chunk - 2
138+
array_loc_stop[-1] -= 1
139+
array_loc_start[0] = 0
140+
array_locs = (array_loc_start, array_loc_stop)
141+
142+
results.append(f.map_overlap(
143+
_gradient_kernel,
144+
dtype=f.dtype,
145+
depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
146+
boundary="none",
147+
coord=varargs[i],
148+
axis=ax,
149+
array_locs=array_locs,
150+
grad_kwargs=kwargs,
151+
))
152+
153+
if drop_result_list:
154+
results = results[0]
155+
156+
return results

xarray/core/dataarray.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2073,6 +2073,9 @@ def diff(self, dim, n=1, label='upper'):
20732073
Coordinates:
20742074
* x (x) int64 3 4
20752075
2076+
See Also
2077+
--------
2078+
DataArray.differentiate
20762079
"""
20772080
ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
20782081
return self._from_temp_dataset(ds)
@@ -2352,6 +2355,61 @@ def rank(self, dim, pct=False, keep_attrs=False):
23522355
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
23532356
return self._from_temp_dataset(ds)
23542357

2358+
def differentiate(self, coord, edge_order=1, datetime_unit=None):
2359+
""" Differentiate the array with the second order accurate central
2360+
differences.
2361+
2362+
.. note::
2363+
This feature is limited to simple cartesian geometry, i.e. coord
2364+
must be one dimensional.
2365+
2366+
Parameters
2367+
----------
2368+
coord: str
2369+
The coordinate to be used to compute the gradient.
2370+
edge_order: 1 or 2. Default 1
2371+
N-th order accurate differences at the boundaries.
2372+
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
2373+
'us', 'ns', 'ps', 'fs', 'as'}
2374+
Unit to compute gradient. Only valid for datetime coordinate.
2375+
2376+
Returns
2377+
-------
2378+
differentiated: DataArray
2379+
2380+
See also
2381+
--------
2382+
numpy.gradient: corresponding numpy function
2383+
2384+
Examples
2385+
--------
2386+
2387+
>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
2388+
... coords={'x': [0, 0.1, 1.1, 1.2]})
2389+
>>> da
2390+
<xarray.DataArray (x: 4, y: 3)>
2391+
array([[ 0, 1, 2],
2392+
[ 3, 4, 5],
2393+
[ 6, 7, 8],
2394+
[ 9, 10, 11]])
2395+
Coordinates:
2396+
* x (x) float64 0.0 0.1 1.1 1.2
2397+
Dimensions without coordinates: y
2398+
>>>
2399+
>>> da.differentiate('x')
2400+
<xarray.DataArray (x: 4, y: 3)>
2401+
array([[30. , 30. , 30. ],
2402+
[27.545455, 27.545455, 27.545455],
2403+
[27.545455, 27.545455, 27.545455],
2404+
[30. , 30. , 30. ]])
2405+
Coordinates:
2406+
* x (x) float64 0.0 0.1 1.1 1.2
2407+
Dimensions without coordinates: y
2408+
"""
2409+
ds = self._to_temp_dataset().differentiate(
2410+
coord, edge_order, datetime_unit)
2411+
return self._from_temp_dataset(ds)
2412+
23552413

23562414
# priority most be higher than Variable to properly work with binary ufuncs
23572415
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)

xarray/core/dataset.py

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
import xarray as xr
1414

1515
from . import (
16-
alignment, duck_array_ops, formatting, groupby, indexing, ops, resample,
17-
rolling, utils)
16+
alignment, computation, duck_array_ops, formatting, groupby, indexing, ops,
17+
resample, rolling, utils)
1818
from .. import conventions
1919
from .alignment import align
2020
from .common import (
@@ -31,7 +31,7 @@
3131
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
3232
from .utils import (
3333
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
34-
ensure_us_time_resolution, hashable, maybe_wrap_array)
34+
ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric)
3535
from .variable import IndexVariable, Variable, as_variable, broadcast_variables
3636

3737
# list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -3417,6 +3417,9 @@ def diff(self, dim, n=1, label='upper'):
34173417
Data variables:
34183418
foo (x) int64 1 -1
34193419
3420+
See Also
3421+
--------
3422+
Dataset.differentiate
34203423
"""
34213424
if n == 0:
34223425
return self
@@ -3767,6 +3770,62 @@ def rank(self, dim, pct=False, keep_attrs=False):
37673770
attrs = self.attrs if keep_attrs else None
37683771
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
37693772

3773+
def differentiate(self, coord, edge_order=1, datetime_unit=None):
3774+
""" Differentiate with the second order accurate central
3775+
differences.
3776+
3777+
.. note::
3778+
This feature is limited to simple cartesian geometry, i.e. coord
3779+
must be one dimensional.
3780+
3781+
Parameters
3782+
----------
3783+
coord: str
3784+
The coordinate to be used to compute the gradient.
3785+
edge_order: 1 or 2. Default 1
3786+
N-th order accurate differences at the boundaries.
3787+
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
3788+
'us', 'ns', 'ps', 'fs', 'as'}
3789+
Unit to compute gradient. Only valid for datetime coordinate.
3790+
3791+
Returns
3792+
-------
3793+
differentiated: Dataset
3794+
3795+
See also
3796+
--------
3797+
numpy.gradient: corresponding numpy function
3798+
"""
3799+
from .variable import Variable
3800+
3801+
if coord not in self.variables and coord not in self.dims:
3802+
raise ValueError('Coordinate {} does not exist.'.format(coord))
3803+
3804+
coord_var = self[coord].variable
3805+
if coord_var.ndim != 1:
3806+
raise ValueError('Coordinate {} must be 1 dimensional but is {}'
3807+
' dimensional'.format(coord, coord_var.ndim))
3808+
3809+
dim = coord_var.dims[0]
3810+
coord_data = coord_var.data
3811+
if coord_data.dtype.kind in 'mM':
3812+
if datetime_unit is None:
3813+
datetime_unit, _ = np.datetime_data(coord_data.dtype)
3814+
coord_data = to_numeric(coord_data, datetime_unit=datetime_unit)
3815+
3816+
variables = OrderedDict()
3817+
for k, v in self.variables.items():
3818+
if (k in self.data_vars and dim in v.dims and
3819+
k not in self.coords):
3820+
v = to_numeric(v, datetime_unit=datetime_unit)
3821+
grad = duck_array_ops.gradient(
3822+
v.data, coord_data, edge_order=edge_order,
3823+
axis=v.get_axis_num(dim))
3824+
variables[k] = Variable(v.dims, grad)
3825+
else:
3826+
variables[k] = v
3827+
return self._replace_vars_and_dims(variables)
3828+
37703829
@property
37713830
def real(self):
37723831
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)

xarray/core/duck_array_ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@ def isnull(data):
9393
einsum = _dask_or_eager_func('einsum', array_args=slice(1, None),
9494
requires_dask='0.17.3')
9595

96+
97+
def gradient(x, coord, axis, edge_order):
98+
if isinstance(x, dask_array_type):
99+
return dask_array_compat.gradient(
100+
x, coord, axis=axis, edge_order=edge_order)
101+
return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)
102+
103+
96104
masked_invalid = _dask_or_eager_func(
97105
'masked_invalid', eager_module=np.ma,
98106
dask_module=getattr(dask_array, 'ma', None))

0 commit comments

Comments
 (0)