Skip to content

Commit c770eec

Browse files
spencerkclarkfujiisoup
authored andcommitted
Add support for cftime.datetime coordinates with coarsen (#2778)
1 parent 0c534b0 commit c770eec

File tree

5 files changed

+71
-8
lines changed

5 files changed

+71
-8
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ Enhancements
4545
See :ref:`comput.coarsen` for details.
4646
(:issue:`2525`)
4747
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
48+
- Taking the mean of arrays of :py:class:`cftime.datetime` objects, and
49+
by extension, use of :py:meth:`~xarray.DataArray.coarsen` with
50+
:py:class:`cftime.datetime` coordinates is now possible. By `Spencer Clark
51+
<https://github.com/spencerkclark>`_.
4852
- Upsampling an array via interpolation with resample is now dask-compatible,
4953
as long as the array is not chunked along the resampling dimension.
5054
By `Spencer Clark <https://github.com/spencerkclark>`_.

xarray/core/common.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -997,15 +997,15 @@ def is_np_datetime_like(dtype):
997997
np.issubdtype(dtype, np.timedelta64))
998998

999999

1000-
def contains_cftime_datetimes(var):
1001-
"""Check if a variable contains cftime datetime objects"""
1000+
def _contains_cftime_datetimes(array):
1001+
"""Check if an array contains cftime.datetime objects"""
10021002
try:
10031003
from cftime import datetime as cftime_datetime
10041004
except ImportError:
10051005
return False
10061006
else:
1007-
if var.dtype == np.dtype('O') and var.data.size > 0:
1008-
sample = var.data.ravel()[0]
1007+
if array.dtype == np.dtype('O') and array.size > 0:
1008+
sample = array.ravel()[0]
10091009
if isinstance(sample, dask_array_type):
10101010
sample = sample.compute()
10111011
if isinstance(sample, np.ndarray):
@@ -1015,6 +1015,11 @@ def contains_cftime_datetimes(var):
10151015
return False
10161016

10171017

1018+
def contains_cftime_datetimes(var):
1019+
"""Check if an xarray.Variable contains cftime.datetime objects"""
1020+
return _contains_cftime_datetimes(var.data)
1021+
1022+
10181023
def _contains_datetime_like_objects(var):
10191024
"""Check if a variable contains datetime like objects (either
10201025
np.datetime64, np.timedelta64, or cftime.datetime)"""

xarray/core/duck_array_ops.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
294294
295295
Parameters
296296
----------
297-
da : array
297+
da : np.array
298298
Input data
299299
offset: Scalar with the same type of array or None
300300
If None, subtract minimum values to reduce round off error
@@ -306,6 +306,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
306306
-------
307307
array
308308
"""
309+
# TODO: make this function dask-compatible?
309310
if offset is None:
310311
offset = array.min()
311312
array = array - offset
@@ -326,15 +327,34 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
326327
return array.astype(dtype)
327328

328329

330+
def _to_pytimedelta(array, unit='us'):
331+
index = pd.TimedeltaIndex(array.ravel(), unit=unit)
332+
return index.to_pytimedelta().reshape(array.shape)
333+
334+
329335
def mean(array, axis=None, skipna=None, **kwargs):
330-
""" inhouse mean that can handle datatime dtype """
336+
"""inhouse mean that can handle np.datetime64 or cftime.datetime
337+
dtypes"""
338+
from .common import _contains_cftime_datetimes
339+
331340
array = asarray(array)
332341
if array.dtype.kind in 'Mm':
333342
offset = min(array)
334-
# xarray always uses datetime[ns] for datetime
343+
# xarray always uses np.datetime64[ns] for np.datetime64 data
335344
dtype = 'timedelta64[ns]'
336345
return _mean(datetime_to_numeric(array, offset), axis=axis,
337346
skipna=skipna, **kwargs).astype(dtype) + offset
347+
elif _contains_cftime_datetimes(array):
348+
if isinstance(array, dask_array_type):
349+
raise NotImplementedError(
350+
'Computing the mean of an array containing '
351+
'cftime.datetime objects is not yet implemented on '
352+
'dask arrays.')
353+
offset = min(array)
354+
timedeltas = datetime_to_numeric(array, offset, datetime_unit='us')
355+
mean_timedeltas = _mean(timedeltas, axis=axis, skipna=skipna,
356+
**kwargs)
357+
return _to_pytimedelta(mean_timedeltas, unit='us') + offset
338358
else:
339359
return _mean(array, axis=axis, skipna=skipna, **kwargs)
340360

xarray/tests/test_dataset.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
InaccessibleArray, UnexpectedDataAccess, assert_allclose,
2424
assert_array_equal, assert_equal, assert_identical, has_cftime, has_dask,
2525
raises_regex, requires_bottleneck, requires_dask, requires_scipy,
26-
source_ndarray)
26+
source_ndarray, requires_cftime)
2727

2828
try:
2929
import dask.array as da
@@ -4530,6 +4530,15 @@ def test_coarsen_coords(ds, dask):
45304530
actual = da.coarsen(time=2).mean()
45314531

45324532

4533+
@requires_cftime
4534+
def test_coarsen_coords_cftime():
4535+
times = xr.cftime_range('2000', periods=6)
4536+
da = xr.DataArray(range(6), [('time', times)])
4537+
actual = da.coarsen(time=3).mean()
4538+
expected_times = xr.cftime_range('2000-01-02', freq='3D', periods=2)
4539+
np.testing.assert_array_equal(actual.time, expected_times)
4540+
4541+
45334542
def test_rolling_properties(ds):
45344543
# catching invalid args
45354544
with pytest.raises(ValueError) as exception:

xarray/tests/test_duck_array_ops.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,31 @@ def test_datetime_reduce(dask):
270270
assert da['time'][0].mean() == da['time'][:1].mean()
271271

272272

273+
@requires_cftime
274+
def test_cftime_datetime_mean():
275+
times = cftime_range('2000', periods=4)
276+
da = DataArray(times, dims=['time'])
277+
278+
assert da.isel(time=0).mean() == da.isel(time=0)
279+
280+
expected = DataArray(times.date_type(2000, 1, 2, 12))
281+
result = da.mean()
282+
assert_equal(result, expected)
283+
284+
da_2d = DataArray(times.values.reshape(2, 2))
285+
result = da_2d.mean()
286+
assert_equal(result, expected)
287+
288+
289+
@requires_cftime
290+
@requires_dask
291+
def test_cftime_datetime_mean_dask_error():
292+
times = cftime_range('2000', periods=4)
293+
da = DataArray(times, dims=['time']).chunk()
294+
with pytest.raises(NotImplementedError):
295+
da.mean()
296+
297+
273298
@pytest.mark.parametrize('dim_num', [1, 2])
274299
@pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_])
275300
@pytest.mark.parametrize('dask', [False, True])

0 commit comments

Comments
 (0)