Skip to content

Add support for CFTimeIndex in get_clean_interp_index #3631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Jan 26, 2020
Merged
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d5c2242
add support for CFTimeIndex in get_clean_interp_index
huard Dec 16, 2019
77bb24c
black
huard Dec 16, 2019
03e7769
added test comparing cftime index with standard index
huard Dec 16, 2019
e169cf4
added comment
huard Dec 16, 2019
303020f
index in ns instead of days
huard Dec 16, 2019
210fb94
pep8
huard Dec 16, 2019
1cfe72d
datetime_to_numeric: convert timedelta objects using np.timedelta64 t…
huard Dec 18, 2019
4964163
added interp test
huard Dec 18, 2019
83f6c89
switched clean_interp_index resolution to us. Fixed interpolate_na an…
huard Dec 18, 2019
6298953
Error message to explain overflow problem.
huard Dec 18, 2019
3d23ccf
Merge branch 'fix-3641' into cf_interp_index
huard Dec 19, 2019
2ba1803
switched timedelta64 units from ms to us
huard Dec 20, 2019
9a648d9
Merge branch 'fix-3641' into cf_interp_index
huard Dec 20, 2019
e873da2
reverted default user-visible resolution to ns. Converts to float, po…
huard Jan 6, 2020
532756d
pep8
huard Jan 6, 2020
73d8729
black
huard Jan 6, 2020
4288780
special case for older numpy versions
huard Jan 6, 2020
077145e
black
huard Jan 6, 2020
758d81c
added xfail for overflow error with numpy < 1.17
huard Jan 6, 2020
d0d8bfe
changes following PR comments from spencerclark
huard Jan 14, 2020
6c9630a
bypass pandas to convert timedeltas to floats. avoids overflow errors.
huard Jan 17, 2020
d18c775
black
huard Jan 17, 2020
78e17ec
Merge branch 'master' into cf_interp_index
huard Jan 17, 2020
6615c97
removed numpy conversion. added docstrings. renamed tests.
huard Jan 20, 2020
2df2b29
pep8
huard Jan 20, 2020
31f5417
updated whats new
huard Jan 20, 2020
2974af9
Update doc/whats-new.rst
huard Jan 20, 2020
eeb5074
update interpolate_na docstrings
huard Jan 20, 2020
6b9631f
black
huard Jan 20, 2020
5656fdb
dt conflicts with accessor
huard Jan 20, 2020
dcf98ff
replaced assert_equal by assert_allclose
huard Jan 24, 2020
4842a96
Update xarray/core/duck_array_ops.py
huard Jan 25, 2020
6dbf225
Update xarray/core/duck_array_ops.py
huard Jan 25, 2020
c90dc97
renamed array to value in timedelta_to_numeric. Added tests
huard Jan 25, 2020
71fb87d
removed support for TimedeltaIndex in timedelta_to_numeric
huard Jan 25, 2020
3d9f333
added tests for np_timedelta64_to_float and pd_timedelta_to_float. re…
huard Jan 26, 2020
b04785c
black
huard Jan 26, 2020
d24cae4
Fix flake8 error
spencerkclark Jan 26, 2020
6f0c504
black
spencerkclark Jan 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
@@ -23,8 +23,8 @@ Breaking changes
~~~~~~~~~~~~~~~~

- Remove ``compat`` and ``encoding`` kwargs from ``DataArray``, which
have been deprecated since 0.12. (:pull:`3650`).
Instead, specify the encoding when writing to disk or set
have been deprecated since 0.12. (:pull:`3650`).
Instead, specify the encoding when writing to disk or set
the ``encoding`` attribute directly.
By `Maximilian Roos <https://github.com/max-sixty>`_

@@ -48,10 +48,15 @@ New Features
- :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`
now allow swapping to dimension names that don't exist yet. (:pull:`3636`)
By `Justus Magin <https://github.com/keewis>`_.
- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties
and support `.dt` accessor for timedelta
- Extend :py:class:`core.accessor_dt.DatetimeAccessor` properties
and support `.dt` accessor for timedelta
via :py:class:`core.accessor_dt.TimedeltaAccessor` (:pull:`3612`)
By `Anderson Banihirwe <https://github.com/andersy005>`_.
- Support CFTimeIndex in :py:meth:`DataArray.interpolate_na`, define 1970-01-01
as the default offset for the interpolation index for both DatetimeIndex and
CFTimeIndex, use microseconds in the conversion from timedelta objects
to floats to avoid overflow errors (:issue:`3641`, :pull:`3631`).
By David Huard `<https://github.com/huard>`_.

Bug fixes
~~~~~~~~~
9 changes: 8 additions & 1 deletion xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
@@ -430,7 +430,14 @@ def __sub__(self, other):
import cftime

if isinstance(other, (CFTimeIndex, cftime.datetime)):
return pd.TimedeltaIndex(np.array(self) - np.array(other))
try:
return pd.TimedeltaIndex(np.array(self) - np.array(other))
except OverflowError:
raise ValueError(
"The time difference exceeds the range of values "
"that can be expressed at the nanosecond resolution."
)

elif isinstance(other, pd.TimedeltaIndex):
return CFTimeIndex(np.array(self) - other.to_pytimedelta())
else:
8 changes: 6 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
cast,
)

import datetime
import numpy as np
import pandas as pd

@@ -2041,7 +2042,9 @@ def interpolate_na(
method: str = "linear",
limit: int = None,
use_coordinate: Union[bool, str] = True,
max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
max_gap: Union[
int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
] = None,
**kwargs: Any,
) -> "DataArray":
"""Fill in NaNs by interpolating according to different methods.
@@ -2073,14 +2076,15 @@ def interpolate_na(
or None for no limit. This filling is done regardless of the size of
the gap in the data. To only interpolate over gaps less than a given length,
see ``max_gap``.
max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None.
Maximum size of gap, a continuous sequence of NaNs, that will be filled.
Use None for no limit. When interpolating along a datetime64 dimension
and ``use_coordinate=True``, ``max_gap`` can be one of the following:

- a string that is valid input for pandas.to_timedelta
- a :py:class:`numpy.timedelta64` object
- a :py:class:`pandas.Timedelta` object
- a :py:class:`datetime.timedelta` object

Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
dimensions has not been implemented yet. Gap length is defined as the difference
8 changes: 6 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@
cast,
)

import datetime
import numpy as np
import pandas as pd

@@ -3994,7 +3995,9 @@ def interpolate_na(
method: str = "linear",
limit: int = None,
use_coordinate: Union[bool, Hashable] = True,
max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
max_gap: Union[
int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
] = None,
**kwargs: Any,
) -> "Dataset":
"""Fill in NaNs by interpolating according to different methods.
@@ -4027,14 +4030,15 @@ def interpolate_na(
or None for no limit. This filling is done regardless of the size of
the gap in the data. To only interpolate over gaps less than a given length,
see ``max_gap``.
max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, default None.
max_gap: int, float, str, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, default None.
Maximum size of gap, a continuous sequence of NaNs, that will be filled.
Use None for no limit. When interpolating along a datetime64 dimension
and ``use_coordinate=True``, ``max_gap`` can be one of the following:

- a string that is valid input for pandas.to_timedelta
- a :py:class:`numpy.timedelta64` object
- a :py:class:`pandas.Timedelta` object
- a :py:class:`datetime.timedelta` object

Otherwise, ``max_gap`` must be an int or a float. Use of ``max_gap`` with unlabeled
dimensions has not been implemented yet. Gap length is defined as the difference
122 changes: 106 additions & 16 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
@@ -372,51 +372,141 @@ def _datetime_nanmin(array):


def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
"""Convert an array containing datetime-like data to an array of floats.
"""Convert an array containing datetime-like data to numerical values.

Convert the datetime array to a timedelta relative to an offset.

Parameters
----------
da : np.array
Input data
offset: Scalar with the same type of array or None
If None, subtract minimum values to reduce round off error
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
dtype: target dtype
da : array-like
Input data
offset: None, datetime or cftime.datetime
Datetime offset. If None, this is set by default to the array's minimum
value to reduce round off errors.
datetime_unit: {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as}
If not None, convert output to a given datetime unit. Note that some
conversions are not allowed due to non-linear relationships between units.
dtype: dtype
Output dtype.

Returns
-------
array
Numerical representation of datetime object relative to an offset.

Notes
-----
Some datetime unit conversions won't work, for example from days to years, even
though some calendars would allow for them (e.g. no_leap). This is because there
is no `cftime.timedelta` object.
"""
# TODO: make this function dask-compatible?
# Set offset to minimum if not given
if offset is None:
if array.dtype.kind in "Mm":
offset = _datetime_nanmin(array)
else:
offset = min(array)

# Compute timedelta object.
# For np.datetime64, this can silently yield garbage due to overflow.
# One option is to enforce 1970-01-01 as the universal offset.
array = array - offset

if not hasattr(array, "dtype"): # scalar is converted to 0d-array
# Scalar is converted to 0d-array
if not hasattr(array, "dtype"):
array = np.array(array)

# Convert timedelta objects to float by first converting to microseconds.
if array.dtype.kind in "O":
# possibly convert object array containing datetime.timedelta
array = np.asarray(pd.Series(array.ravel())).reshape(array.shape)
return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype)

if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)
# Convert np.NaT to np.nan
elif array.dtype.kind in "mM":

# convert np.NaT to np.nan
if array.dtype.kind in "mM":
# Convert to specified timedelta units.
if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)
return np.where(isnull(array), np.nan, array.astype(dtype))
return array.astype(dtype)


def timedelta_to_numeric(value, datetime_unit="ns", dtype=float):
"""Convert a timedelta-like object to numerical values.

Parameters
----------
value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str
Time delta representation.
datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as}
The time units of the output values. Note that some conversions are not allowed due to
non-linear relationships between units.
dtype : type
The output data type.

"""
import datetime as dt

if isinstance(value, dt.timedelta):
out = py_timedelta_to_float(value, datetime_unit)
elif isinstance(value, np.timedelta64):
out = np_timedelta64_to_float(value, datetime_unit)
elif isinstance(value, pd.Timedelta):
out = pd_timedelta_to_float(value, datetime_unit)
elif isinstance(value, str):
try:
a = pd.to_timedelta(value)
except ValueError:
raise ValueError(
f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta"
)
return py_timedelta_to_float(a, datetime_unit)
else:
raise TypeError(
f"Expected value of type str, pandas.Timedelta, datetime.timedelta "
f"or numpy.timedelta64, but received {type(value).__name__}"
)
return out.astype(dtype)


def _to_pytimedelta(array, unit="us"):
index = pd.TimedeltaIndex(array.ravel(), unit=unit)
return index.to_pytimedelta().reshape(array.shape)


def np_timedelta64_to_float(array, datetime_unit):
"""Convert numpy.timedelta64 to float.

Notes
-----
The array is first converted to microseconds, which is less likely to
cause overflow errors.
"""
array = array.astype("timedelta64[ns]").astype(np.float64)
conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit)
return conversion_factor * array


def pd_timedelta_to_float(value, datetime_unit):
"""Convert pandas.Timedelta to float.

Notes
-----
Built on the assumption that pandas timedelta values are in nanoseconds,
which is also the numpy default resolution.
"""
value = value.to_timedelta64()
return np_timedelta64_to_float(value, datetime_unit)


def py_timedelta_to_float(array, datetime_unit):
"""Convert a timedelta object to a float, possibly at a loss of resolution.
"""
array = np.asarray(array)
array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit)
return conversion_factor * array


def mean(array, axis=None, skipna=None, **kwargs):
"""inhouse mean that can handle np.datetime64 or cftime.datetime
dtypes"""
139 changes: 78 additions & 61 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
@@ -2,14 +2,15 @@
from functools import partial
from numbers import Number
from typing import Any, Callable, Dict, Hashable, Sequence, Union
import datetime as dt

import numpy as np
import pandas as pd

from . import utils
from .common import _contains_datetime_like_objects, ones_like
from .computation import apply_ufunc
from .duck_array_ops import dask_array_type
from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric
from .utils import OrderedSet, is_scalar
from .variable import Variable, broadcast_variables

@@ -207,52 +208,81 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):


def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
"""get index to use for x values in interpolation.
"""Return index to use for x values in interpolation or curve fitting.
If use_coordinate is True, the coordinate that shares the name of the
dimension along which interpolation is being performed will be used as the
x values.
Parameters
----------
arr : DataArray
Array to interpolate or fit to a curve.
dim : str
Name of dimension along which to fit.
use_coordinate : str or bool
If use_coordinate is True, the coordinate that shares the name of the
dimension along which interpolation is being performed will be used as the
x values. If False, the x values are set as an equally spaced sequence.
Returns
-------
Variable
Numerical values for the x-coordinates.
If use_coordinate is False, the x values are set as an equally spaced
sequence.
Notes
-----
If indexing is along the time dimension, datetime coordinates are converted
to time deltas with respect to 1970-01-01.
"""
if use_coordinate:
if use_coordinate is True:
index = arr.get_index(dim)
else:
index = arr.coords[use_coordinate]
if index.ndim != 1:
raise ValueError(
f"Coordinates used for interpolation must be 1D, "
f"{use_coordinate} is {index.ndim}D."
)
index = index.to_index()

# TODO: index.name is None for multiindexes
# set name for nice error messages below
if isinstance(index, pd.MultiIndex):
index.name = dim

if not index.is_monotonic:
raise ValueError(f"Index {index.name!r} must be monotonically increasing")

if not index.is_unique:
raise ValueError(f"Index {index.name!r} has duplicate values")

# raise if index cannot be cast to a float (e.g. MultiIndex)
try:
index = index.values.astype(np.float64)
except (TypeError, ValueError):
# pandas raises a TypeError
# xarray/numpy raise a ValueError
raise TypeError(
f"Index {index.name!r} must be castable to float64 to support "
f"interpolation, got {type(index).__name__}."
)

else:
# Question: If use_coordinate is a string, what role does `dim` play?
from xarray.coding.cftimeindex import CFTimeIndex

if use_coordinate is False:
axis = arr.get_axis_num(dim)
index = np.arange(arr.shape[axis], dtype=np.float64)
return np.arange(arr.shape[axis], dtype=np.float64)

if use_coordinate is True:
index = arr.get_index(dim)

else: # string
index = arr.coords[use_coordinate]
if index.ndim != 1:
raise ValueError(
f"Coordinates used for interpolation must be 1D, "
f"{use_coordinate} is {index.ndim}D."
)
index = index.to_index()

# TODO: index.name is None for multiindexes
# set name for nice error messages below
if isinstance(index, pd.MultiIndex):
index.name = dim

if not index.is_monotonic:
raise ValueError(f"Index {index.name!r} must be monotonically increasing")

if not index.is_unique:
raise ValueError(f"Index {index.name!r} has duplicate values")

# Special case for non-standard calendar indexes
# Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds
if isinstance(index, (CFTimeIndex, pd.DatetimeIndex)):
offset = type(index[0])(1970, 1, 1)
if isinstance(index, CFTimeIndex):
index = index.values
index = Variable(
data=datetime_to_numeric(index, offset=offset, datetime_unit="ns"),
dims=(dim,),
)

# raise if index cannot be cast to a float (e.g. MultiIndex)
try:
index = index.values.astype(np.float64)
except (TypeError, ValueError):
# pandas raises a TypeError
# xarray/numpy raise a ValueError
raise TypeError(
f"Index {index.name!r} must be castable to float64 to support "
f"interpolation, got {type(index).__name__}."
)

return index

@@ -263,11 +293,13 @@ def interp_na(
use_coordinate: Union[bool, str] = True,
method: str = "linear",
limit: int = None,
max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64] = None,
max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None,
**kwargs,
):
"""Interpolate values according to different methods.
"""
from xarray.coding.cftimeindex import CFTimeIndex

if dim is None:
raise NotImplementedError("dim is a required argument")

@@ -281,26 +313,11 @@ def interp_na(

if (
dim in self.indexes
and isinstance(self.indexes[dim], pd.DatetimeIndex)
and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex))
and use_coordinate
):
if not isinstance(max_gap, (np.timedelta64, pd.Timedelta, str)):
raise TypeError(
f"Underlying index is DatetimeIndex. Expected max_gap of type str, pandas.Timedelta or numpy.timedelta64 but received {max_type}"
)

if isinstance(max_gap, str):
try:
max_gap = pd.to_timedelta(max_gap)
except ValueError:
raise ValueError(
f"Could not convert {max_gap!r} to timedelta64 using pandas.to_timedelta"
)

if isinstance(max_gap, pd.Timedelta):
max_gap = np.timedelta64(max_gap.value, "ns")

max_gap = np.timedelta64(max_gap, "ns").astype(np.float64)
# Convert to float
max_gap = timedelta_to_numeric(max_gap)

if not use_coordinate:
if not isinstance(max_gap, (Number, np.number)):
80 changes: 77 additions & 3 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings
from textwrap import dedent

import datetime as dt
import numpy as np
import pandas as pd
import pytest
@@ -19,6 +20,10 @@
rolling_window,
stack,
where,
py_timedelta_to_float,
np_timedelta64_to_float,
pd_timedelta_to_float,
timedelta_to_numeric,
)
from xarray.core.pycompat import dask_array_type
from xarray.testing import assert_allclose, assert_equal
@@ -672,17 +677,86 @@ def test_datetime_to_numeric_datetime64():

@requires_cftime
def test_datetime_to_numeric_cftime():
times = cftime_range("2000", periods=5, freq="7D").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h")
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h", dtype=int
)
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_datetime_to_numeric_potential_overflow():
import cftime

times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]")
cftimes = cftime_range(
"2000", periods=5, freq="7D", calendar="proleptic_gregorian"
).values

offset = np.datetime64("0001-01-01")
cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1)

result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="D", dtype=int
)
cfresult = duck_array_ops.datetime_to_numeric(
cftimes, offset=cfoffset, datetime_unit="D", dtype=int
)

expected = 730119 + np.arange(0, 35, 7)

np.testing.assert_array_equal(result, expected)
np.testing.assert_array_equal(cfresult, expected)


def test_py_timedelta_to_float():
assert py_timedelta_to_float(dt.timedelta(days=1), "ns") == 86400 * 1e9
assert py_timedelta_to_float(dt.timedelta(days=1e6), "ps") == 86400 * 1e18
assert py_timedelta_to_float(dt.timedelta(days=1e6), "ns") == 86400 * 1e15
assert py_timedelta_to_float(dt.timedelta(days=1e6), "us") == 86400 * 1e12
assert py_timedelta_to_float(dt.timedelta(days=1e6), "ms") == 86400 * 1e9
assert py_timedelta_to_float(dt.timedelta(days=1e6), "s") == 86400 * 1e6
assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6


@pytest.mark.parametrize(
"td, expected",
([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]),
)
def test_np_timedelta64_to_float(td, expected):
out = np_timedelta64_to_float(td, datetime_unit="ns")
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)

out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns")
np.testing.assert_allclose(out, expected)


@pytest.mark.parametrize(
"td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0])
)
def test_pd_timedelta_to_float(td, expected):
out = pd_timedelta_to_float(td, datetime_unit="ns")
np.testing.assert_allclose(out, expected)
assert isinstance(out, float)


@pytest.mark.parametrize(
"td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"],
)
def test_timedelta_to_numeric(td):
# Scalar input
out = timedelta_to_numeric(td, "ns")
np.testing.assert_allclose(out, 86400 * 1e9)
assert isinstance(out, float)
7 changes: 7 additions & 0 deletions xarray/tests/test_interp.py
Original file line number Diff line number Diff line change
@@ -662,3 +662,10 @@ def test_datetime_interp_noerror():
coords={"time": pd.date_range("01-01-2001", periods=50, freq="H")},
)
a.interp(x=xi, time=xi.time) # should not raise an error


@requires_cftime
def test_3641():
times = xr.cftime_range("0001", periods=3, freq="500Y")
da = xr.DataArray(range(3), dims=["time"], coords=[times])
da.interp(time=["0002-05-01"])
61 changes: 55 additions & 6 deletions xarray/tests/test_missing.py
Original file line number Diff line number Diff line change
@@ -16,18 +16,34 @@
from xarray.tests import (
assert_array_equal,
assert_equal,
assert_allclose,
raises_regex,
requires_bottleneck,
requires_dask,
requires_scipy,
requires_cftime,
)

from xarray.tests.test_cftime_offsets import _CFTIME_CALENDARS


@pytest.fixture
def da():
return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time")


@pytest.fixture
def cf_da():
def _cf_da(calendar, freq="1D"):
times = xr.cftime_range(
start="1970-01-01", freq=freq, periods=10, calendar=calendar
)
values = np.arange(10)
return xr.DataArray(values, dims=("time",), coords={"time": times})

return _cf_da


@pytest.fixture
def ds():
ds = xr.Dataset()
@@ -472,6 +488,42 @@ def test_interpolate_na_nan_block_lengths(y, lengths):
assert_equal(actual, expected)


@requires_cftime
@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
def test_get_clean_interp_index_cf_calendar(cf_da, calendar):
"""The index for CFTimeIndex is in units of days. This means that if two series using a 360 and 365 days
calendar each have a trend of .01C/year, the linear regression coefficients will be different because they
have different number of days.
Another option would be to have an index in units of years, but this would likely create other difficulties.
"""
i = get_clean_interp_index(cf_da(calendar), dim="time")
np.testing.assert_array_equal(i, np.arange(10) * 1e9 * 86400)


@requires_cftime
@pytest.mark.parametrize(
("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"])
)
def test_get_clean_interp_index_dt(cf_da, calendar, freq):
"""In the gregorian case, the index should be proportional to normal datetimes."""
g = cf_da(calendar, freq=freq)
g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",))

gi = get_clean_interp_index(g, "time")
si = get_clean_interp_index(g, "time", use_coordinate="stime")
np.testing.assert_array_equal(gi, si)


def test_get_clean_interp_index_potential_overflow():
da = xr.DataArray(
[0, 1, 2],
dims=("time",),
coords={"time": xr.cftime_range("0000-01-01", periods=3, calendar="360_day")},
)
get_clean_interp_index(da, "time")


@pytest.fixture
def da_time():
return xr.DataArray(
@@ -490,7 +542,7 @@ def test_interpolate_na_max_gap_errors(da_time):
da_time.interpolate_na("t", max_gap=(1,))

da_time["t"] = pd.date_range("2001-01-01", freq="H", periods=11)
with raises_regex(TypeError, "Underlying index is"):
with raises_regex(TypeError, "Expected value of type str"):
da_time.interpolate_na("t", max_gap=1)

with raises_regex(TypeError, "Expected integer or floating point"):
@@ -501,10 +553,7 @@ def test_interpolate_na_max_gap_errors(da_time):


@requires_bottleneck
@pytest.mark.parametrize(
"time_range_func",
[pd.date_range, pytest.param(xr.cftime_range, marks=pytest.mark.xfail)],
)
@pytest.mark.parametrize("time_range_func", [pd.date_range, xr.cftime_range])
@pytest.mark.parametrize("transform", [lambda x: x, lambda x: x.to_dataset(name="a")])
@pytest.mark.parametrize(
"max_gap", ["3H", np.timedelta64(3, "h"), pd.to_timedelta("3H")]
@@ -517,7 +566,7 @@ def test_interpolate_na_max_gap_time_specifier(
da_time.copy(data=[np.nan, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan, 10])
)
actual = transform(da_time).interpolate_na("t", max_gap=max_gap)
assert_equal(actual, expected)
assert_allclose(actual, expected)


@requires_bottleneck