Skip to content

Commit 49731d4

Browse files
spencerkclarkshoyer
authored andcommitted
Use built-in interp for interpolation with resample (#2640)
* Use built-in interp for interpolation with resample Use bounds_error=False in interpolate * Add test for 2197 * lint * flake8
1 parent c62c4fa commit 49731d4

File tree

3 files changed

+58
-93
lines changed

3 files changed

+58
-93
lines changed

doc/whats-new.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,19 @@ Breaking changes
2828
Enhancements
2929
~~~~~~~~~~~~
3030

31+
- Upsampling an array via interpolation with resample is now dask-compatible,
32+
as long as the array is not chunked along the resampling dimension.
33+
By `Spencer Clark <https://github.com/spencerkclark>`_.
34+
3135
Bug fixes
3236
~~~~~~~~~
3337

38+
- Interpolating via resample now internally specifies ``bounds_error=False``
39+
as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation
40+
from higher frequencies to lower frequencies. Datapoints outside the bounds
41+
of the original time coordinate are now filled with NaN (:issue:`2197`). By
42+
`Spencer Clark <https://github.com/spencerkclark>`_.
43+
3444
.. _whats-new.0.11.1:
3545

3646
v0.11.1 (29 December 2018)

xarray/core/resample.py

Lines changed: 10 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from . import ops
44
from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy
5-
from .pycompat import OrderedDict, dask_array_type
65

76
RESAMPLE_DIM = '__resample_dim__'
87

@@ -110,7 +109,16 @@ def interpolate(self, kind='linear'):
110109
return self._interpolate(kind=kind)
111110

112111
def _interpolate(self, kind='linear'):
113-
raise NotImplementedError
112+
"""Apply scipy.interpolate.interp1d along resampling dimension."""
113+
# drop any existing non-dimension coordinates along the resampling
114+
# dimension
115+
dummy = self._obj.copy()
116+
for k, v in self._obj.coords.items():
117+
if k != self._dim and self._dim in v.dims:
118+
dummy = dummy.drop(k)
119+
return dummy.interp(assume_sorted=True, method=kind,
120+
kwargs={'bounds_error': False},
121+
**{self._dim: self._full_index})
114122

115123

116124
class DataArrayResample(DataArrayGroupBy, Resample):
@@ -182,46 +190,6 @@ def apply(self, func, shortcut=False, args=(), **kwargs):
182190

183191
return combined
184192

185-
def _interpolate(self, kind='linear'):
186-
"""Apply scipy.interpolate.interp1d along resampling dimension."""
187-
from .dataarray import DataArray
188-
from scipy.interpolate import interp1d
189-
190-
if isinstance(self._obj.data, dask_array_type):
191-
raise TypeError(
192-
"Up-sampling via interpolation was attempted on the the "
193-
"variable '{}', but it is a dask array; dask arrays are not "
194-
"yet supported in resample.interpolate(). Load into "
195-
"memory with Dataset.load() before resampling."
196-
.format(self._obj.data.name)
197-
)
198-
199-
x = self._obj[self._dim].astype('float')
200-
y = self._obj.data
201-
202-
axis = self._obj.get_axis_num(self._dim)
203-
204-
f = interp1d(x, y, kind=kind, axis=axis, bounds_error=True,
205-
assume_sorted=True)
206-
new_x = self._full_index.values.astype('float')
207-
208-
# construct new up-sampled DataArray
209-
dummy = self._obj.copy()
210-
dims = dummy.dims
211-
212-
# drop any existing non-dimension coordinates along the resampling
213-
# dimension
214-
coords = OrderedDict()
215-
for k, v in dummy.coords.items():
216-
# is the resampling dimension
217-
if k == self._dim:
218-
coords[self._dim] = self._full_index
219-
# else, check if resampling dim is in coordinate dimensions
220-
elif self._dim not in v.dims:
221-
coords[k] = v
222-
return DataArray(f(new_x), coords, dims, name=dummy.name,
223-
attrs=dummy.attrs)
224-
225193

226194
ops.inject_reduce_methods(DataArrayResample)
227195
ops.inject_binary_ops(DataArrayResample)
@@ -308,50 +276,6 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
308276
return super(DatasetResample, self).reduce(
309277
func, dim, keep_attrs, **kwargs)
310278

311-
def _interpolate(self, kind='linear'):
312-
"""Apply scipy.interpolate.interp1d along resampling dimension."""
313-
from .dataset import Dataset
314-
from .variable import Variable
315-
from scipy.interpolate import interp1d
316-
317-
old_times = self._obj[self._dim].astype(float)
318-
new_times = self._full_index.values.astype(float)
319-
320-
data_vars = OrderedDict()
321-
coords = OrderedDict()
322-
323-
# Apply the interpolation to each DataArray in our original Dataset
324-
for name, variable in self._obj.variables.items():
325-
if name in self._obj.coords:
326-
if name == self._dim:
327-
coords[self._dim] = self._full_index
328-
elif self._dim not in variable.dims:
329-
coords[name] = variable
330-
else:
331-
if isinstance(variable.data, dask_array_type):
332-
raise TypeError(
333-
"Up-sampling via interpolation was attempted on the "
334-
"variable '{}', but it is a dask array; dask arrays "
335-
"are not yet supprted in resample.interpolate(). Load "
336-
"into memory with Dataset.load() before resampling."
337-
.format(name)
338-
)
339-
340-
axis = variable.get_axis_num(self._dim)
341-
342-
# We've previously checked for monotonicity along the
343-
# re-sampling dimension (in __init__ via the GroupBy
344-
# constructor), so we can avoid sorting the data again by
345-
# passing 'assume_sorted=True'
346-
f = interp1d(old_times, variable.data, kind=kind,
347-
axis=axis, bounds_error=True,
348-
assume_sorted=True)
349-
interpolated = Variable(variable.dims, f(new_times))
350-
351-
data_vars[name] = interpolated
352-
353-
return Dataset(data_vars, coords)
354-
355279

356280
ops.inject_reduce_methods(DatasetResample)
357281
ops.inject_binary_ops(DatasetResample)

xarray/tests/test_dataarray.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2524,6 +2524,16 @@ def test_upsample_interpolate(self):
25242524
# done here due to floating point arithmetic
25252525
assert_allclose(expected, actual, rtol=1e-16)
25262526

2527+
@requires_scipy
2528+
def test_upsample_interpolate_bug_2197(self):
2529+
dates = pd.date_range('2007-02-01', '2007-03-01', freq='D')
2530+
da = xr.DataArray(np.arange(len(dates)), [('time', dates)])
2531+
result = da.resample(time='M').interpolate('linear')
2532+
expected_times = np.array([np.datetime64('2007-02-28'),
2533+
np.datetime64('2007-03-31')])
2534+
expected = xr.DataArray([27., np.nan], [('time', expected_times)])
2535+
assert_equal(result, expected)
2536+
25272537
@requires_scipy
25282538
def test_upsample_interpolate_regression_1605(self):
25292539
dates = pd.date_range('2016-01-01', '2016-03-31', freq='1D')
@@ -2536,21 +2546,42 @@ def test_upsample_interpolate_regression_1605(self):
25362546
@requires_dask
25372547
@requires_scipy
25382548
def test_upsample_interpolate_dask(self):
2539-
import dask.array as da
2540-
2541-
times = pd.date_range('2000-01-01', freq='6H', periods=5)
2549+
from scipy.interpolate import interp1d
25422550
xs = np.arange(6)
25432551
ys = np.arange(3)
2552+
times = pd.date_range('2000-01-01', freq='6H', periods=5)
25442553

25452554
z = np.arange(5)**2
2546-
data = da.from_array(np.tile(z, (6, 3, 1)), (1, 3, 1))
2555+
data = np.tile(z, (6, 3, 1))
25472556
array = DataArray(data,
25482557
{'time': times, 'x': xs, 'y': ys},
25492558
('x', 'y', 'time'))
2559+
chunks = {'x': 2, 'y': 1}
2560+
2561+
expected_times = times.to_series().resample('1H').asfreq().index
2562+
# Split the times into equal sub-intervals to simulate the 6 hour
2563+
# to 1 hour up-sampling
2564+
new_times_idx = np.linspace(0, len(times) - 1, len(times) * 5)
2565+
for kind in ['linear', 'nearest', 'zero', 'slinear', 'quadratic',
2566+
'cubic']:
2567+
actual = array.chunk(chunks).resample(time='1H').interpolate(kind)
2568+
actual = actual.compute()
2569+
f = interp1d(np.arange(len(times)), data, kind=kind, axis=-1,
2570+
bounds_error=True, assume_sorted=True)
2571+
expected_data = f(new_times_idx)
2572+
expected = DataArray(expected_data,
2573+
{'time': expected_times, 'x': xs, 'y': ys},
2574+
('x', 'y', 'time'))
2575+
# Use AllClose because there are some small differences in how
2576+
# we upsample timeseries versus the integer indexing as I've
2577+
# done here due to floating point arithmetic
2578+
assert_allclose(expected, actual, rtol=1e-16)
25502579

2551-
with raises_regex(TypeError,
2552-
"dask arrays are not yet supported"):
2553-
array.resample(time='1H').interpolate('linear')
2580+
# Check that an error is raised if an attempt is made to interpolate
2581+
# over a chunked dimension
2582+
with raises_regex(NotImplementedError,
2583+
'Chunking along the dimension to be interpolated'):
2584+
array.chunk({'time': 1}).resample(time='1H').interpolate('linear')
25542585

25552586
def test_align(self):
25562587
array = DataArray(np.random.random((6, 8)),

0 commit comments

Comments
 (0)