Skip to content

fix datetime_to_numeric and Variable._to_numeric #2668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 11, 2019
Merged
12 changes: 6 additions & 6 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@
DatasetCoordinates, LevelCoordinatesSource, assert_coordinate_consistent,
remap_label_indexers,
)
from .duck_array_ops import datetime_to_numeric
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Within differentiate, we use datetime_to_numeric on Variable objects; I think you should switch to the new Variable-specific method there.

from .indexes import Indexes, default_indexes, isel_variable_and_index
from .merge import (
dataset_merge_method, dataset_update_method, merge_data_and_coords,
merge_variables)
from .options import OPTIONS, _get_keep_attrs
from .pycompat import dask_array_type
from .utils import (
Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric,
decode_numpy_dict_values, either_dict_or_kwargs, hashable,
maybe_wrap_array)
Frozen, SortedKeysDict, _check_inplace,
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution,
hashable, maybe_wrap_array)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables
if TYPE_CHECKING:
from .dataarray import DataArray
Expand Down Expand Up @@ -3997,15 +3998,14 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
datetime_unit, _ = np.datetime_data(coord_var.dtype)
elif datetime_unit is None:
datetime_unit = 's' # Default to seconds for cftime objects
coord_var = datetime_to_numeric(
coord_var, datetime_unit=datetime_unit)
coord_var = coord_var._to_numeric(datetime_unit=datetime_unit)

variables = OrderedDict()
for k, v in self.variables.items():
if (k in self.data_vars and dim in v.dims and
k not in self.coords):
if _contains_datetime_like_objects(v):
v = datetime_to_numeric(v, datetime_unit=datetime_unit)
v = v._to_numeric(datetime_unit=datetime_unit)
grad = duck_array_ops.gradient(
v.data, coord_var, edge_order=edge_order,
axis=v.get_axis_num(dim))
Expand Down
43 changes: 40 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy as np
import pandas as pd

from . import dask_array_ops, dtypes, npcompat, nputils, utils
from . import dask_array_ops, dtypes, npcompat, nputils
from .nputils import nanfirst, nanlast
from .pycompat import dask_array_type

Expand Down Expand Up @@ -289,14 +289,51 @@ def f(values, axis=None, skipna=None, **kwargs):
_mean = _create_nan_agg_method('mean')


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.

Parameters
----------
da : array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
dtype: target dtype

Returns
-------
array
"""
if offset is None:
offset = array.min()
array = array - offset

if not hasattr(array, 'dtype'): # scalar is converted to 0d-array
array = np.array(array)

if array.dtype.kind in 'O':
# possibly convert object array containing datetime.timedelta
array = np.asarray(pd.Series(array.ravel())).reshape(array.shape)

if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)

# convert np.NaT to np.nan
if array.dtype.kind in 'mM':
return np.where(isnull(array), np.nan, array.astype(dtype))
return array.astype(dtype)


def mean(array, axis=None, skipna=None, **kwargs):
""" inhouse mean that can handle datatime dtype """
array = asarray(array)
if array.dtype.kind == 'M':
if array.dtype.kind in 'Mm':
offset = min(array)
# xarray always uses datetime[ns] for datetime
dtype = 'timedelta64[ns]'
return _mean(utils.datetime_to_numeric(array, offset), axis=axis,
return _mean(datetime_to_numeric(array, offset), axis=axis,
skipna=skipna, **kwargs).astype(dtype) + offset
else:
return _mean(array, axis=axis, skipna=skipna, **kwargs)
Expand Down
11 changes: 5 additions & 6 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from . import utils
from .common import _contains_datetime_like_objects
from .computation import apply_ufunc
from .duck_array_ops import dask_array_type
from .utils import OrderedSet, datetime_to_numeric, is_scalar
from .duck_array_ops import dask_array_type, datetime_to_numeric
from .utils import OrderedSet, is_scalar
from .variable import Variable, broadcast_variables


Expand Down Expand Up @@ -411,10 +411,9 @@ def _floatize_x(x, new_x):
# We assume that the most of the bits are used to represent the
# offset (min(x)) and the variation (x - min(x)) can be
# represented by float.
xmin = x[i].min()
x[i] = datetime_to_numeric(x[i], offset=xmin, dtype=np.float64)
new_x[i] = datetime_to_numeric(
new_x[i], offset=xmin, dtype=np.float64)
xmin = x[i].values.min()
x[i] = x[i]._to_numeric(offset=xmin, dtype=np.float64)
new_x[i] = new_x[i]._to_numeric(offset=xmin, dtype=np.float64)
return x, new_x


Expand Down
34 changes: 0 additions & 34 deletions xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,40 +603,6 @@ def __len__(self):
return len(self._data) - num_hidden


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.

Parameters
----------
da : array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
dtype: target dtype

Returns
-------
array
"""
from . import duck_array_ops

if offset is None:
offset = array.min()
array = array - offset

if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)
# convert np.NaT to np.nan
if array.dtype.kind in 'mM':
if hasattr(array, 'isnull'):
return np.where(array.isnull(), np.nan, array.astype(dtype))
return np.where(duck_array_ops.isnull(array), np.nan,
array.astype(dtype))
return array


def get_temp_dimname(dims, new_dim):
""" Get an new dimension name based on new_dim, that is not used in dims.
If the same name exists, we add an underscore(s) in the head.
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,6 +1782,14 @@ def func(self, other):
return self
return func

def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
""" A (private) method to convert datetime array to numeric dtype
See duck_array_ops.datetime_to_numeric
"""
numeric_array = duck_array_ops.datetime_to_numeric(
self.data, offset, datetime_unit, dtype)
return type(self)(self.dims, numeric_array, self._attrs)


ops.inject_all_ops_and_reduce_methods(Variable)

Expand Down
9 changes: 5 additions & 4 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ALL_DIMS, DataArray, Dataset, IndexVariable, MergeError, Variable, align,
backends, broadcast, open_dataset, set_options)
from xarray.core import dtypes, indexing, npcompat, utils
from xarray.core.common import full_like
from xarray.core.common import duck_array_ops, full_like
from xarray.core.pycompat import integer_types

from . import (
Expand Down Expand Up @@ -4676,7 +4676,7 @@ def test_differentiate_datetime(dask):
actual = da.differentiate('x', edge_order=1, datetime_unit='D')
expected_x = xr.DataArray(
npcompat.gradient(
da, utils.datetime_to_numeric(da['x'], datetime_unit='D'),
da, da['x'].variable._to_numeric(datetime_unit='D'),
axis=0, edge_order=1), dims=da.dims, coords=da.coords)
assert_equal(expected_x, actual)

Expand Down Expand Up @@ -4710,7 +4710,7 @@ def test_differentiate_cftime(dask):

actual = da.differentiate('time', edge_order=1, datetime_unit='D')
expected_data = npcompat.gradient(
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'),
da, da['time'].variable._to_numeric(datetime_unit='D'),
axis=0, edge_order=1)
expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims)
assert_equal(expected, actual)
Expand Down Expand Up @@ -4789,7 +4789,8 @@ def test_trapz_datetime(dask, which_datetime):

actual = da.integrate('time', datetime_unit='D')
expected_data = np.trapz(
da, utils.datetime_to_numeric(da['time'], datetime_unit='D'), axis=0)
da, duck_array_ops.datetime_to_numeric(da['time'], datetime_unit='D'),
axis=0)
expected = xr.DataArray(
expected_data, dims=['y'],
coords={k: v for k, v in da.coords.items() if 'time' not in v.dims})
Expand Down
46 changes: 43 additions & 3 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
import pytest
from numpy import array, nan

from xarray import DataArray, Dataset, concat
from xarray import DataArray, Dataset, concat, cftime_range
from xarray.core import dtypes, duck_array_ops
from xarray.core.duck_array_ops import (
array_notnull_equiv, concatenate, count, first, gradient, last, mean,
rolling_window, stack, where)
from xarray.core.pycompat import dask_array_type
from xarray.testing import assert_allclose, assert_equal
from xarray.testing import assert_allclose, assert_equal, assert_identical

from . import (
assert_array_equal, has_dask, has_np113, raises_regex, requires_dask)
assert_array_equal, has_dask, has_np113, raises_regex, requires_cftime,
requires_dask)


class TestOps(object):
Expand Down Expand Up @@ -569,3 +570,42 @@ def test_docs():
indicated dimension(s) removed.
""")
assert actual == expected


def test_datetime_to_numeric_datetime64():
times = pd.date_range('2000', periods=5, freq='7D').values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit='h')
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit='h', dtype=dtype)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
times = cftime_range('2000', periods=5, freq='7D').values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit='h')
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit='h')
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit='h', dtype=dtype)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)
13 changes: 13 additions & 0 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,16 @@ def test_cftime_to_non_cftime_error():

with pytest.raises(TypeError):
da.interp(time=0.5)


@requires_scipy
def test_datetime_interp_noerror():
# GH:2667
a = xr.DataArray(
np.arange(21).reshape(3, 7), dims=['x', 'time'],
coords={'x': [1, 2, 3],
'time': pd.date_range('01-01-2001', periods=7, freq='D')})
xi = xr.DataArray(
np.linspace(1, 3, 50), dims=['time'],
coords={'time': pd.date_range('01-01-2001', periods=50, freq='H')})
a.interp(x=xi, time=xi.time) # should not raise an error
39 changes: 0 additions & 39 deletions xarray/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,42 +279,3 @@ def test_either_dict_or_kwargs():

with pytest.raises(ValueError, match=r'foo'):
result = either_dict_or_kwargs(dict(a=1), dict(a=1), 'foo')


def test_datetime_to_numeric_datetime64():
times = pd.date_range('2000', periods=5, freq='7D')
da = xr.DataArray(times, coords=[times], dims=['time'])
result = utils.datetime_to_numeric(da, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
assert_identical(result, expected)

offset = da.isel(time=1)
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
assert_identical(result, expected)

dtype = np.float32
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
expected = 24 * xr.DataArray(
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
assert_identical(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
times = xr.cftime_range('2000', periods=5, freq='7D')
da = xr.DataArray(times, coords=[times], dims=['time'])
result = utils.datetime_to_numeric(da, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords)
assert_identical(result, expected)

offset = da.isel(time=1)
result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h')
expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords)
assert_identical(result, expected)

dtype = np.float32
result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype)
expected = 24 * xr.DataArray(
np.arange(0, 35, 7), coords=da.coords).astype(dtype)
assert_identical(result, expected)