Skip to content

Commit 0e4bc7a

Browse files
fujiisoupdcherian
authored andcommitted
WIP: fix regression about datetime_to_numeric
1 parent 56bc724 commit 0e4bc7a

File tree

10 files changed

+118
-92
lines changed

10 files changed

+118
-92
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Bug fixes
5151

5252
- Silenced warnings that appear when using pandas 0.24.
5353
By `Stephan Hoyer <https://github.com/shoyer>`_
54+
- Bug fix for interpolation with an datetime array. (:issue:`2668`)
55+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
5456
- Interpolating via resample now internally specifies ``bounds_error=False``
5557
as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation
5658
from higher frequencies to lower frequencies. Datapoints outside the bounds

xarray/core/dataset.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
from .options import OPTIONS, _get_keep_attrs
3131
from .pycompat import dask_array_type
3232
from .utils import (
33-
Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric,
34-
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution,
35-
hashable, maybe_wrap_array)
33+
Frozen, SortedKeysDict, _check_inplace, decode_numpy_dict_values,
34+
either_dict_or_kwargs, ensure_us_time_resolution, hashable,
35+
maybe_wrap_array)
3636
from .variable import IndexVariable, Variable, as_variable, broadcast_variables
3737
from ..plot.dataset_plot import _Dataset_PlotMethods
3838

@@ -3853,15 +3853,14 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
38533853
datetime_unit, _ = np.datetime_data(coord_var.dtype)
38543854
elif datetime_unit is None:
38553855
datetime_unit = 's' # Default to seconds for cftime objects
3856-
coord_var = datetime_to_numeric(
3857-
coord_var, datetime_unit=datetime_unit)
3856+
coord_var = coord_var._to_numeric(datetime_unit=datetime_unit)
38583857

38593858
variables = OrderedDict()
38603859
for k, v in self.variables.items():
38613860
if (k in self.data_vars and dim in v.dims and
38623861
k not in self.coords):
38633862
if _contains_datetime_like_objects(v):
3864-
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
3863+
v = v._to_numeric(datetime_unit=datetime_unit)
38653864
grad = duck_array_ops.gradient(
38663865
v.data, coord_var, edge_order=edge_order,
38673866
axis=v.get_axis_num(dim))

xarray/core/duck_array_ops.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212
import pandas as pd
1313

14-
from . import dask_array_ops, dtypes, npcompat, nputils, utils
14+
from . import dask_array_ops, dtypes, npcompat, nputils
1515
from .nputils import nanfirst, nanlast
1616
from .pycompat import dask_array_type
1717

@@ -277,14 +277,51 @@ def f(values, axis=None, skipna=None, **kwargs):
277277
_mean = _create_nan_agg_method('mean')
278278

279279

280+
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
281+
"""Convert an array containing datetime-like data to an array of floats.
282+
283+
Parameters
284+
----------
285+
da : array
286+
Input data
287+
offset: Scalar with the same type of array or None
288+
If None, subtract minimum values to reduce round off error
289+
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
290+
'us', 'ns', 'ps', 'fs', 'as'}
291+
dtype: target dtype
292+
293+
Returns
294+
-------
295+
array
296+
"""
297+
if offset is None:
298+
offset = array.min()
299+
array = array - offset
300+
301+
if not hasattr(array, 'dtype'): # scalar is converted to 0d-array
302+
array = np.array(array)
303+
304+
if array.dtype.kind in 'O':
305+
# possibly convert object array containing datetime.timedelta
306+
array = np.asarray(pd.Series(array.ravel())).reshape(array.shape)
307+
308+
if datetime_unit:
309+
array = array / np.timedelta64(1, datetime_unit)
310+
311+
# convert np.NaT to np.nan
312+
if array.dtype.kind in 'mM':
313+
return np.where(isnull(array), np.nan, array.astype(dtype))
314+
return array.astype(dtype)
315+
316+
280317
def mean(array, axis=None, skipna=None, **kwargs):
281318
""" inhouse mean that can handle datatime dtype """
282319
array = asarray(array)
283-
if array.dtype.kind == 'M':
320+
if array.dtype.kind in 'Mm':
284321
offset = min(array)
285322
# xarray always uses datetime[ns] for datetime
286323
dtype = 'timedelta64[ns]'
287-
return _mean(utils.datetime_to_numeric(array, offset), axis=axis,
324+
return _mean(datetime_to_numeric(array, offset), axis=axis,
288325
skipna=skipna, **kwargs).astype(dtype) + offset
289326
else:
290327
return _mean(array, axis=axis, skipna=skipna, **kwargs)

xarray/core/missing.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .common import _contains_datetime_like_objects
1111
from .computation import apply_ufunc
1212
from .duck_array_ops import dask_array_type
13-
from .utils import OrderedSet, datetime_to_numeric, is_scalar
13+
from .utils import OrderedSet, is_scalar
1414
from .variable import Variable, broadcast_variables
1515

1616

@@ -411,10 +411,9 @@ def _floatize_x(x, new_x):
411411
# We assume that the most of the bits are used to represent the
412412
# offset (min(x)) and the variation (x - min(x)) can be
413413
# represented by float.
414-
xmin = x[i].min()
415-
x[i] = datetime_to_numeric(x[i], offset=xmin, dtype=np.float64)
416-
new_x[i] = datetime_to_numeric(
417-
new_x[i], offset=xmin, dtype=np.float64)
414+
xmin = x[i].values.min()
415+
x[i] = x[i]._to_numeric(offset=xmin, dtype=np.float64)
416+
new_x[i] = new_x[i]._to_numeric(offset=xmin, dtype=np.float64)
418417
return x, new_x
419418

420419

xarray/core/utils.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -603,40 +603,6 @@ def __len__(self):
603603
return len(self._data) - num_hidden
604604

605605

606-
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
607-
"""Convert an array containing datetime-like data to an array of floats.
608-
609-
Parameters
610-
----------
611-
da : array
612-
Input data
613-
offset: Scalar with the same type of array or None
614-
If None, subtract minimum values to reduce round off error
615-
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
616-
'us', 'ns', 'ps', 'fs', 'as'}
617-
dtype: target dtype
618-
619-
Returns
620-
-------
621-
array
622-
"""
623-
from . import duck_array_ops
624-
625-
if offset is None:
626-
offset = array.min()
627-
array = array - offset
628-
629-
if datetime_unit:
630-
array = array / np.timedelta64(1, datetime_unit)
631-
# convert np.NaT to np.nan
632-
if array.dtype.kind in 'mM':
633-
if hasattr(array, 'isnull'):
634-
return np.where(array.isnull(), np.nan, array.astype(dtype))
635-
return np.where(duck_array_ops.isnull(array), np.nan,
636-
array.astype(dtype))
637-
return array
638-
639-
640606
def get_temp_dimname(dims, new_dim):
641607
""" Get an new dimension name based on new_dim, that is not used in dims.
642608
If the same name exists, we add an underscore(s) in the head.

xarray/core/variable.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,14 @@ def func(self, other):
17821782
return self
17831783
return func
17841784

1785+
def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
1786+
""" A (private) method to convert datetime array to numeric dtype
1787+
See duck_array_ops.datetime_to_numeric
1788+
"""
1789+
numeric_array = duck_array_ops.datetime_to_numeric(
1790+
self.data, offset, datetime_unit, dtype)
1791+
return type(self)(self.dims, numeric_array, self._attrs)
1792+
17851793

17861794
ops.inject_all_ops_and_reduce_methods(Variable)
17871795

xarray/tests/test_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4671,7 +4671,7 @@ def test_differentiate_datetime(dask):
46714671
actual = da.differentiate('x', edge_order=1, datetime_unit='D')
46724672
expected_x = xr.DataArray(
46734673
npcompat.gradient(
4674-
da, utils.datetime_to_numeric(da['x'], datetime_unit='D'),
4674+
da, da['x'].variable._to_numeric(datetime_unit='D'),
46754675
axis=0, edge_order=1), dims=da.dims, coords=da.coords)
46764676
assert_equal(expected_x, actual)
46774677

@@ -4705,7 +4705,7 @@ def test_differentiate_cftime(dask):
47054705

47064706
actual = da.differentiate('time', edge_order=1, datetime_unit='D')
47074707
expected_data = npcompat.gradient(
4708-
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'),
4708+
da, da['time'].variable._to_numeric(datetime_unit='D'),
47094709
axis=0, edge_order=1)
47104710
expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
47114711
assert_equal(expected, actual)

xarray/tests/test_duck_array_ops.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
import pytest
88
from numpy import array, nan
99

10-
from xarray import DataArray, Dataset, concat
10+
from xarray import DataArray, Dataset, concat, cftime_range
1111
from xarray.core import dtypes, duck_array_ops
1212
from xarray.core.duck_array_ops import (
1313
array_notnull_equiv, concatenate, count, first, gradient, last, mean,
1414
rolling_window, stack, where)
1515
from xarray.core.pycompat import dask_array_type
16-
from xarray.testing import assert_allclose, assert_equal
16+
from xarray.testing import assert_allclose, assert_equal, assert_identical
1717

1818
from . import (
19-
assert_array_equal, has_dask, has_np113, raises_regex, requires_dask)
19+
assert_array_equal, has_dask, has_np113, raises_regex, requires_cftime,
20+
requires_dask)
2021

2122

2223
class TestOps(object):
@@ -569,3 +570,42 @@ def test_docs():
569570
indicated dimension(s) removed.
570571
""")
571572
assert actual == expected
573+
574+
575+
def test_datetime_to_numeric_datetime64():
576+
times = pd.date_range('2000', periods=5, freq='7D').values
577+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
578+
expected = 24 * np.arange(0, 35, 7)
579+
np.testing.assert_array_equal(result, expected)
580+
581+
offset = times[1]
582+
result = duck_array_ops.datetime_to_numeric(
583+
times, offset=offset, datetime_unit='h')
584+
expected = 24 * np.arange(-7, 28, 7)
585+
np.testing.assert_array_equal(result, expected)
586+
587+
dtype = np.float32
588+
result = duck_array_ops.datetime_to_numeric(
589+
times, datetime_unit='h', dtype=dtype)
590+
expected = 24 * np.arange(0, 35, 7).astype(dtype)
591+
np.testing.assert_array_equal(result, expected)
592+
593+
594+
@requires_cftime
595+
def test_datetime_to_numeric_cftime():
596+
times = cftime_range('2000', periods=5, freq='7D').values
597+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
598+
expected = 24 * np.arange(0, 35, 7)
599+
np.testing.assert_array_equal(result, expected)
600+
601+
offset = times[1]
602+
result = duck_array_ops.datetime_to_numeric(
603+
times, offset=offset, datetime_unit='h')
604+
expected = 24 * np.arange(-7, 28, 7)
605+
np.testing.assert_array_equal(result, expected)
606+
607+
dtype = np.float32
608+
result = duck_array_ops.datetime_to_numeric(
609+
times, datetime_unit='h', dtype=dtype)
610+
expected = 24 * np.arange(0, 35, 7).astype(dtype)
611+
np.testing.assert_array_equal(result, expected)

xarray/tests/test_interp.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,3 +571,17 @@ def test_cftime_to_non_cftime_error():
571571

572572
with pytest.raises(TypeError):
573573
da.interp(time=0.5)
574+
575+
576+
@requires_cftime
577+
@requires_scipy
578+
def test_datetime_interp_noerror():
579+
# GH:2667
580+
a = xr.DataArray(
581+
np.arange(21).reshape(3, 7), dims=['x', 'time'],
582+
coords={'x': [1, 2, 3],
583+
'time': pd.date_range('01-01-2001', periods=7, freq='D')})
584+
xi = xr.DataArray(
585+
np.linspace(1, 3, 50), dims=['time'],
586+
coords={'time': pd.date_range('01-01-2001', periods=50, freq='H')})
587+
a.interp(x=xi, time=xi.time) # should no error

xarray/tests/test_utils.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -279,42 +279,3 @@ def test_either_dict_or_kwargs():
279279

280280
with pytest.raises(ValueError, match=r'foo'):
281281
result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo')
282-
283-
284-
def test_datetime_to_numeric_datetime64():
285-
times = pd.date_range('2000', periods=5, freq='7D')
286-
da = xr.DataArray(times, coords=[times], dims=['time'])
287-
result = utils.datetime_to_numeric(da, datetime_unit='h')
288-
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
289-
assert_identical(result, expected)
290-
291-
offset = da.isel(time=1)
292-
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
293-
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
294-
assert_identical(result, expected)
295-
296-
dtype = np.float32
297-
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
298-
expected = 24 * xr.DataArray(
299-
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
300-
assert_identical(result, expected)
301-
302-
303-
@requires_cftime
304-
def test_datetime_to_numeric_cftime():
305-
times = xr.cftime_range('2000', periods=5, freq='7D')
306-
da = xr.DataArray(times, coords=[times], dims=['time'])
307-
result = utils.datetime_to_numeric(da, datetime_unit='h')
308-
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
309-
assert_identical(result, expected)
310-
311-
offset = da.isel(time=1)
312-
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
313-
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
314-
assert_identical(result, expected)
315-
316-
dtype = np.float32
317-
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
318-
expected = 24 * xr.DataArray(
319-
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
320-
assert_identical(result, expected)

0 commit comments

Comments
 (0)