Skip to content

Commit 4cd56a9

Browse files
authored
fix datetime_to_numeric and Variable._to_numeric (#2668)
* WIP: fix regression about datetime_to_numeric * Workaround for object array * added a whatsnew * rearrange tests * lint * Added Variable._to_numeric * Fix for cftime * Update via comments * lint * Fix via comment * Fix errors * lint
1 parent 6d20766 commit 4cd56a9

File tree

9 files changed

+120
-95
lines changed

9 files changed

+120
-95
lines changed

xarray/core/dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,17 @@
2828
DatasetCoordinates, LevelCoordinatesSource, assert_coordinate_consistent,
2929
remap_label_indexers,
3030
)
31+
from .duck_array_ops import datetime_to_numeric
3132
from .indexes import Indexes, default_indexes, isel_variable_and_index
3233
from .merge import (
3334
dataset_merge_method, dataset_update_method, merge_data_and_coords,
3435
merge_variables)
3536
from .options import OPTIONS, _get_keep_attrs
3637
from .pycompat import dask_array_type
3738
from .utils import (
38-
Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric,
39-
decode_numpy_dict_values, either_dict_or_kwargs, hashable,
40-
maybe_wrap_array)
39+
Frozen, SortedKeysDict, _check_inplace,
40+
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution,
41+
hashable, maybe_wrap_array)
4142
from .variable import IndexVariable, Variable, as_variable, broadcast_variables
4243
if TYPE_CHECKING:
4344
from .dataarray import DataArray
@@ -3997,15 +3998,14 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
39973998
datetime_unit, _ = np.datetime_data(coord_var.dtype)
39983999
elif datetime_unit is None:
39994000
datetime_unit = 's' # Default to seconds for cftime objects
4000-
coord_var = datetime_to_numeric(
4001-
coord_var, datetime_unit=datetime_unit)
4001+
coord_var = coord_var._to_numeric(datetime_unit=datetime_unit)
40024002

40034003
variables = OrderedDict()
40044004
for k, v in self.variables.items():
40054005
if (k in self.data_vars and dim in v.dims and
40064006
k not in self.coords):
40074007
if _contains_datetime_like_objects(v):
4008-
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
4008+
v = v._to_numeric(datetime_unit=datetime_unit)
40094009
grad = duck_array_ops.gradient(
40104010
v.data, coord_var, edge_order=edge_order,
40114011
axis=v.get_axis_num(dim))

xarray/core/duck_array_ops.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212
import pandas as pd
1313

14-
from . import dask_array_ops, dtypes, npcompat, nputils, utils
14+
from . import dask_array_ops, dtypes, npcompat, nputils
1515
from .nputils import nanfirst, nanlast
1616
from .pycompat import dask_array_type
1717

@@ -289,14 +289,51 @@ def f(values, axis=None, skipna=None, **kwargs):
289289
_mean = _create_nan_agg_method('mean')
290290

291291

292+
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
293+
"""Convert an array containing datetime-like data to an array of floats.
294+
295+
Parameters
296+
----------
297+
da : array
298+
Input data
299+
offset: Scalar with the same type of array or None
300+
If None, subtract minimum values to reduce round off error
301+
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
302+
'us', 'ns', 'ps', 'fs', 'as'}
303+
dtype: target dtype
304+
305+
Returns
306+
-------
307+
array
308+
"""
309+
if offset is None:
310+
offset = array.min()
311+
array = array - offset
312+
313+
if not hasattr(array, 'dtype'): # scalar is converted to 0d-array
314+
array = np.array(array)
315+
316+
if array.dtype.kind in 'O':
317+
# possibly convert object array containing datetime.timedelta
318+
array = np.asarray(pd.Series(array.ravel())).reshape(array.shape)
319+
320+
if datetime_unit:
321+
array = array / np.timedelta64(1, datetime_unit)
322+
323+
# convert np.NaT to np.nan
324+
if array.dtype.kind in 'mM':
325+
return np.where(isnull(array), np.nan, array.astype(dtype))
326+
return array.astype(dtype)
327+
328+
292329
def mean(array, axis=None, skipna=None, **kwargs):
293330
""" inhouse mean that can handle datatime dtype """
294331
array = asarray(array)
295-
if array.dtype.kind == 'M':
332+
if array.dtype.kind in 'Mm':
296333
offset = min(array)
297334
# xarray always uses datetime[ns] for datetime
298335
dtype = 'timedelta64[ns]'
299-
return _mean(utils.datetime_to_numeric(array, offset), axis=axis,
336+
return _mean(datetime_to_numeric(array, offset), axis=axis,
300337
skipna=skipna, **kwargs).astype(dtype) + offset
301338
else:
302339
return _mean(array, axis=axis, skipna=skipna, **kwargs)

xarray/core/missing.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from . import utils
1010
from .common import _contains_datetime_like_objects
1111
from .computation import apply_ufunc
12-
from .duck_array_ops import dask_array_type
13-
from .utils import OrderedSet, datetime_to_numeric, is_scalar
12+
from .duck_array_ops import dask_array_type, datetime_to_numeric
13+
from .utils import OrderedSet, is_scalar
1414
from .variable import Variable, broadcast_variables
1515

1616

@@ -411,10 +411,9 @@ def _floatize_x(x, new_x):
411411
# We assume that the most of the bits are used to represent the
412412
# offset (min(x)) and the variation (x - min(x)) can be
413413
# represented by float.
414-
xmin = x[i].min()
415-
x[i] = datetime_to_numeric(x[i], offset=xmin, dtype=np.float64)
416-
new_x[i] = datetime_to_numeric(
417-
new_x[i], offset=xmin, dtype=np.float64)
414+
xmin = x[i].values.min()
415+
x[i] = x[i]._to_numeric(offset=xmin, dtype=np.float64)
416+
new_x[i] = new_x[i]._to_numeric(offset=xmin, dtype=np.float64)
418417
return x, new_x
419418

420419

xarray/core/utils.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -603,40 +603,6 @@ def __len__(self):
603603
return len(self._data) - num_hidden
604604

605605

606-
def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
607-
"""Convert an array containing datetime-like data to an array of floats.
608-
609-
Parameters
610-
----------
611-
da : array
612-
Input data
613-
offset: Scalar with the same type of array or None
614-
If None, subtract minimum values to reduce round off error
615-
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
616-
'us', 'ns', 'ps', 'fs', 'as'}
617-
dtype: target dtype
618-
619-
Returns
620-
-------
621-
array
622-
"""
623-
from . import duck_array_ops
624-
625-
if offset is None:
626-
offset = array.min()
627-
array = array - offset
628-
629-
if datetime_unit:
630-
array = array / np.timedelta64(1, datetime_unit)
631-
# convert np.NaT to np.nan
632-
if array.dtype.kind in 'mM':
633-
if hasattr(array, 'isnull'):
634-
return np.where(array.isnull(), np.nan, array.astype(dtype))
635-
return np.where(duck_array_ops.isnull(array), np.nan,
636-
array.astype(dtype))
637-
return array
638-
639-
640606
def get_temp_dimname(dims, new_dim):
641607
""" Get an new dimension name based on new_dim, that is not used in dims.
642608
If the same name exists, we add an underscore(s) in the head.

xarray/core/variable.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,14 @@ def func(self, other):
17821782
return self
17831783
return func
17841784

1785+
def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
1786+
""" A (private) method to convert datetime array to numeric dtype
1787+
See duck_array_ops.datetime_to_numeric
1788+
"""
1789+
numeric_array = duck_array_ops.datetime_to_numeric(
1790+
self.data, offset, datetime_unit, dtype)
1791+
return type(self)(self.dims, numeric_array, self._attrs)
1792+
17851793

17861794
ops.inject_all_ops_and_reduce_methods(Variable)
17871795

xarray/tests/test_dataset.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
ALL_DIMS, DataArray, Dataset, IndexVariable, MergeError, Variable, align,
1717
backends, broadcast, open_dataset, set_options)
1818
from xarray.core import dtypes, indexing, npcompat, utils
19-
from xarray.core.common import full_like
19+
from xarray.core.common import duck_array_ops, full_like
2020
from xarray.core.pycompat import integer_types
2121

2222
from . import (
@@ -4676,7 +4676,7 @@ def test_differentiate_datetime(dask):
46764676
actual = da.differentiate('x', edge_order=1, datetime_unit='D')
46774677
expected_x = xr.DataArray(
46784678
npcompat.gradient(
4679-
da, utils.datetime_to_numeric(da['x'], datetime_unit='D'),
4679+
da, da['x'].variable._to_numeric(datetime_unit='D'),
46804680
axis=0, edge_order=1), dims=da.dims, coords=da.coords)
46814681
assert_equal(expected_x, actual)
46824682

@@ -4710,7 +4710,7 @@ def test_differentiate_cftime(dask):
47104710

47114711
actual = da.differentiate('time', edge_order=1, datetime_unit='D')
47124712
expected_data = npcompat.gradient(
4713-
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'),
4713+
da, da['time'].variable._to_numeric(datetime_unit='D'),
47144714
axis=0, edge_order=1)
47154715
expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
47164716
assert_equal(expected, actual)
@@ -4789,7 +4789,8 @@ def test_trapz_datetime(dask, which_datetime):
47894789

47904790
actual = da.integrate('time', datetime_unit='D')
47914791
expected_data = np.trapz(
4792-
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0)
4792+
da, duck_array_ops.datetime_to_numeric(da['time'], datetime_unit='D'),
4793+
axis=0)
47934794
expected = xr.DataArray(
47944795
expected_data, dims=['y'],
47954796
coords={k: v for k, v in da.coords.items() if 'time' not in v.dims})

xarray/tests/test_duck_array_ops.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
import pytest
88
from numpy import array, nan
99

10-
from xarray import DataArray, Dataset, concat
10+
from xarray import DataArray, Dataset, concat, cftime_range
1111
from xarray.core import dtypes, duck_array_ops
1212
from xarray.core.duck_array_ops import (
1313
array_notnull_equiv, concatenate, count, first, gradient, last, mean,
1414
rolling_window, stack, where)
1515
from xarray.core.pycompat import dask_array_type
16-
from xarray.testing import assert_allclose, assert_equal
16+
from xarray.testing import assert_allclose, assert_equal, assert_identical
1717

1818
from . import (
19-
assert_array_equal, has_dask, has_np113, raises_regex, requires_dask)
19+
assert_array_equal, has_dask, has_np113, raises_regex, requires_cftime,
20+
requires_dask)
2021

2122

2223
class TestOps(object):
@@ -569,3 +570,42 @@ def test_docs():
569570
indicated dimension(s) removed.
570571
""")
571572
assert actual == expected
573+
574+
575+
def test_datetime_to_numeric_datetime64():
576+
times = pd.date_range('2000', periods=5, freq='7D').values
577+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
578+
expected = 24 * np.arange(0, 35, 7)
579+
np.testing.assert_array_equal(result, expected)
580+
581+
offset = times[1]
582+
result = duck_array_ops.datetime_to_numeric(
583+
times, offset=offset, datetime_unit='h')
584+
expected = 24 * np.arange(-7, 28, 7)
585+
np.testing.assert_array_equal(result, expected)
586+
587+
dtype = np.float32
588+
result = duck_array_ops.datetime_to_numeric(
589+
times, datetime_unit='h', dtype=dtype)
590+
expected = 24 * np.arange(0, 35, 7).astype(dtype)
591+
np.testing.assert_array_equal(result, expected)
592+
593+
594+
@requires_cftime
595+
def test_datetime_to_numeric_cftime():
596+
times = cftime_range('2000', periods=5, freq='7D').values
597+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
598+
expected = 24 * np.arange(0, 35, 7)
599+
np.testing.assert_array_equal(result, expected)
600+
601+
offset = times[1]
602+
result = duck_array_ops.datetime_to_numeric(
603+
times, offset=offset, datetime_unit='h')
604+
expected = 24 * np.arange(-7, 28, 7)
605+
np.testing.assert_array_equal(result, expected)
606+
607+
dtype = np.float32
608+
result = duck_array_ops.datetime_to_numeric(
609+
times, datetime_unit='h', dtype=dtype)
610+
expected = 24 * np.arange(0, 35, 7).astype(dtype)
611+
np.testing.assert_array_equal(result, expected)

xarray/tests/test_interp.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,3 +571,16 @@ def test_cftime_to_non_cftime_error():
571571

572572
with pytest.raises(TypeError):
573573
da.interp(time=0.5)
574+
575+
576+
@requires_scipy
577+
def test_datetime_interp_noerror():
578+
# GH:2667
579+
a = xr.DataArray(
580+
np.arange(21).reshape(3, 7), dims=['x', 'time'],
581+
coords={'x': [1, 2, 3],
582+
'time': pd.date_range('01-01-2001', periods=7, freq='D')})
583+
xi = xr.DataArray(
584+
np.linspace(1, 3, 50), dims=['time'],
585+
coords={'time': pd.date_range('01-01-2001', periods=50, freq='H')})
586+
a.interp(x=xi, time=xi.time) # should not raise an error

xarray/tests/test_utils.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -279,42 +279,3 @@ def test_either_dict_or_kwargs():
279279

280280
with pytest.raises(ValueError, match=r'foo'):
281281
result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo')
282-
283-
284-
def test_datetime_to_numeric_datetime64():
285-
times = pd.date_range('2000', periods=5, freq='7D')
286-
da = xr.DataArray(times, coords=[times], dims=['time'])
287-
result = utils.datetime_to_numeric(da, datetime_unit='h')
288-
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
289-
assert_identical(result, expected)
290-
291-
offset = da.isel(time=1)
292-
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
293-
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
294-
assert_identical(result, expected)
295-
296-
dtype = np.float32
297-
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
298-
expected = 24 * xr.DataArray(
299-
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
300-
assert_identical(result, expected)
301-
302-
303-
@requires_cftime
304-
def test_datetime_to_numeric_cftime():
305-
times = xr.cftime_range('2000', periods=5, freq='7D')
306-
da = xr.DataArray(times, coords=[times], dims=['time'])
307-
result = utils.datetime_to_numeric(da, datetime_unit='h')
308-
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
309-
assert_identical(result, expected)
310-
311-
offset = da.isel(time=1)
312-
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
313-
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
314-
assert_identical(result, expected)
315-
316-
dtype = np.float32
317-
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
318-
expected = 24 * xr.DataArray(
319-
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
320-
assert_identical(result, expected)

0 commit comments

Comments
 (0)