Skip to content

Commit 3cc0d22

Browse files
observingCloudsshoyer
authored andcommitted
ENH: resample methods with tolerance (#2716)
* ENH: resample methods with tolerance * ENH: resample methods bfill, pad, nearest accept tolerance keyword * DOC: documentation is updated with examples Fixes: GH2695 * TST: Upsampling with tolerance keyword Include tests for GH2695 * pep8 * Make resample().nearest(tolerance) test meaningful * DOC: Mention units of tolerance
1 parent ce3ef3a commit 3cc0d22

File tree

5 files changed

+82
-9
lines changed

5 files changed

+82
-9
lines changed

doc/time-series.rst

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,20 @@ resampling group:
196196
197197
ds.resample(time='6H').reduce(np.mean)
198198
199-
For upsampling, xarray provides four methods: ``asfreq``, ``ffill``, ``bfill``,
200-
and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` and
201-
supports all of its schemes. All of these resampling operations work on both
199+
For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``,
200+
``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d``
201+
and supports all of its schemes. All of these resampling operations work on both
202202
Dataset and DataArray objects with an arbitrary number of dimensions.
203203

204+
In order to limit the scope of the methods ``ffill``, ``bfill``, ``pad`` and
205+
``nearest`` the ``tolerance`` argument can be set in coordinate units.
206+
Data that has indices outside of the given ``tolerance`` are set to ``NaN``.
207+
208+
.. ipython:: python
209+
210+
ds.resample(time='1H').nearest(tolerance='1H')
211+
212+
204213
For more examples of using grouped operations on a time dimension, see
205214
:ref:`toy weather data`.
206215

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Enhancements
4343
report showing what exactly differs between the two objects (dimensions /
4444
coordinates / variables / attributes) (:issue:`1507`).
4545
By `Benoit Bovy <https://github.com/benbovy>`_.
46+
- Add ``tolerance`` option to ``resample()`` methods ``bfill``, ``pad``,
47+
``nearest``. (:issue:`2695`)
48+
By `Hauke Schulz <https://github.com/observingClouds>`_.
4649

4750
Bug fixes
4851
~~~~~~~~~

xarray/core/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,13 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
713713
array([ 0. , 0.032258, 0.064516, ..., 10.935484, 10.967742, 11. ])
714714
Coordinates:
715715
* time (time) datetime64[ns] 1999-12-15 1999-12-16 1999-12-17 ...
716+
717+
Limit scope of upsampling method
718+
>>> da.resample(time='1D').nearest(tolerance='1D')
719+
<xarray.DataArray (time: 337)>
720+
array([ 0., 0., nan, ..., nan, 11., 11.])
721+
Coordinates:
722+
* time (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-11-15
716723
717724
References
718725
----------

xarray/core/resample.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,53 @@ def asfreq(self):
7171
"""
7272
return self._upsample('asfreq')
7373

74-
def pad(self):
74+
def pad(self, tolerance=None):
7575
"""Forward fill new values at up-sampled frequency.
76+
77+
Parameters
78+
----------
79+
tolerance : optional
80+
Maximum distance between original and new labels to limit
81+
the up-sampling method.
82+
Up-sampled data with indices that satisfy the equation
83+
``abs(index[indexer] - target) <= tolerance`` are filled by
84+
new values. Data with indices that are outside the given
85+
tolerance are filled with ``NaN`` s
7686
"""
77-
return self._upsample('pad')
87+
return self._upsample('pad', tolerance=tolerance)
7888
ffill = pad
7989

80-
def backfill(self):
90+
def backfill(self, tolerance=None):
8191
"""Backward fill new values at up-sampled frequency.
92+
93+
Parameters
94+
----------
95+
tolerance : optional
96+
Maximum distance between original and new labels to limit
97+
the up-sampling method.
98+
Up-sampled data with indices that satisfy the equation
99+
``abs(index[indexer] - target) <= tolerance`` are filled by
100+
new values. Data with indices that are outside the given
101+
tolerance are filled with ``NaN`` s
82102
"""
83-
return self._upsample('backfill')
103+
return self._upsample('backfill', tolerance=tolerance)
84104
bfill = backfill
85105

86-
def nearest(self):
106+
def nearest(self, tolerance=None):
87107
"""Take new values from nearest original coordinate to up-sampled
88108
frequency coordinates.
109+
110+
Parameters
111+
----------
112+
tolerance : optional
113+
Maximum distance between original and new labels to limit
114+
the up-sampling method.
115+
Up-sampled data with indices that satisfy the equation
116+
``abs(index[indexer] - target) <= tolerance`` are filled by
117+
new values. Data with indices that are outside the given
118+
tolerance are filled with ``NaN`` s
89119
"""
90-
return self._upsample('nearest')
120+
return self._upsample('nearest', tolerance=tolerance)
91121

92122
def interpolate(self, kind='linear'):
93123
"""Interpolate up-sampled data using the original data

xarray/tests/test_dataarray.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,6 +2485,30 @@ def test_upsample_nd(self):
24852485
('x', 'y', 'time'))
24862486
assert_identical(expected, actual)
24872487

2488+
def test_upsample_tolerance(self):
2489+
# Test tolerance keyword for upsample methods bfill, pad, nearest
2490+
times = pd.date_range('2000-01-01', freq='1D', periods=2)
2491+
times_upsampled = pd.date_range('2000-01-01', freq='6H', periods=5)
2492+
array = DataArray(np.arange(2), [('time', times)])
2493+
2494+
# Forward fill
2495+
actual = array.resample(time='6H').ffill(tolerance='12H')
2496+
expected = DataArray([0., 0., 0., np.nan, 1.],
2497+
[('time', times_upsampled)])
2498+
assert_identical(expected, actual)
2499+
2500+
# Backward fill
2501+
actual = array.resample(time='6H').bfill(tolerance='12H')
2502+
expected = DataArray([0., np.nan, 1., 1., 1.],
2503+
[('time', times_upsampled)])
2504+
assert_identical(expected, actual)
2505+
2506+
# Nearest
2507+
actual = array.resample(time='6H').nearest(tolerance='6H')
2508+
expected = DataArray([0, 0, np.nan, 1, 1],
2509+
[('time', times_upsampled)])
2510+
assert_identical(expected, actual)
2511+
24882512
@requires_scipy
24892513
def test_upsample_interpolate(self):
24902514
from scipy.interpolate import interp1d

0 commit comments

Comments
 (0)