Skip to content

Address latest pandas-related upstream test failures #9081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,21 @@
from xarray.core.types import InclusiveOptions, SideOptions


def _nanosecond_precision_timestamp(*args, **kwargs):
# As of pandas version 3.0, pd.to_datetime(Timestamp(...)) will try to
# infer the appropriate datetime precision. Until xarray supports
# non-nanosecond precision times, we will use this constructor wrapper to
# explicitly create nanosecond-precision Timestamp objects.
return pd.Timestamp(*args, **kwargs).as_unit("ns")


def get_date_type(calendar, use_cftime=True):
"""Return the cftime date type for a given calendar name."""
if cftime is None:
raise ImportError("cftime is required for dates with non-standard calendars")
else:
if _is_standard_calendar(calendar) and not use_cftime:
return pd.Timestamp
return _nanosecond_precision_timestamp

calendars = {
"noleap": cftime.DatetimeNoLeap,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def test_roundtrip_string_encoded_characters(self) -> None:
assert actual["x"].encoding["_Encoding"] == "ascii"

def test_roundtrip_numpy_datetime_data(self) -> None:
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"])
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pandas.to_datetime will infer the precision from the input in pandas 3, so we explicitly specify the desired precision now.

expected = Dataset({"t": ("t", times), "t0": times[0]})
kwargs = {"encoding": {"t0": {"units": "days since 1950-01-01"}}}
with self.roundtrip(expected, save_kwargs=kwargs) as actual:
Expand Down
9 changes: 6 additions & 3 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,11 +538,14 @@ def test_infer_datetime_units(freq, units) -> None:
["dates", "expected"],
[
(
pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]),
pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"], unit="ns"),
"days since 1900-01-01 00:00:00",
),
(pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"),
(pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"),
(
pd.to_datetime(["NaT", "1900-01-01"], unit="ns"),
"days since 1900-01-01 00:00:00",
),
(pd.to_datetime(["NaT"], unit="ns"), "days since 1970-01-01 00:00:00"),
],
)
def test_infer_datetime_units_with_NaT(dates, expected) -> None:
Expand Down
9 changes: 6 additions & 3 deletions xarray/tests/test_combine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from datetime import datetime
from itertools import product

import numpy as np
Expand Down Expand Up @@ -229,8 +228,12 @@ def test_lexicographic_sort_string_coords(self):
assert concat_dims == ["simulation"]

def test_datetime_coords(self):
ds0 = Dataset({"time": [datetime(2000, 3, 6), datetime(2001, 3, 7)]})
ds1 = Dataset({"time": [datetime(1999, 1, 1), datetime(1999, 2, 4)]})
ds0 = Dataset(
{"time": np.array(["2000-03-06", "2000-03-07"], dtype="datetime64[ns]")}
)
ds1 = Dataset(
{"time": np.array(["1999-01-01", "1999-02-04"], dtype="datetime64[ns]")}
)

expected = {(0,): ds1, (1,): ds0}
actual, concat_dims = _infer_concat_order_from_coords([ds0, ds1])
Expand Down
2 changes: 2 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3642,6 +3642,7 @@ def test_to_and_from_dict(
actual_no_data = da.to_dict(data=False, encoding=encoding)
assert expected_no_data == actual_no_data

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_to_and_from_dict_with_time_dim(self) -> None:
x = np.random.randn(10, 3)
t = pd.date_range("20130101", periods=10)
Expand All @@ -3650,6 +3651,7 @@ def test_to_and_from_dict_with_time_dim(self) -> None:
roundtripped = DataArray.from_dict(da.to_dict())
assert_identical(da, roundtripped)

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_to_and_from_dict_with_nan_nat(self) -> None:
y = np.random.randn(10, 3)
y[2] = np.nan
Expand Down
5 changes: 2 additions & 3 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ def test_groupby_da_datetime() -> None:
times = pd.date_range("2000-01-01", periods=4)
foo = xr.DataArray([1, 2, 3, 4], coords=dict(time=times), dims="time")
# create test index
dd = times.to_pydatetime()
reference_dates = [dd[0], dd[2]]
reference_dates = [times[0], times[2]]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, whether reference_dates started as datetime.datetime objects or np.datetime64[ns] values was not material to this test, so I removed the conversion to datetime.datetime to avoid the conversion warning under pandas 3 (the times would previously get converted back to datetime64[ns] values in the DataArray constructor).

labels = reference_dates[0:1] * 2 + reference_dates[1:2] * 2
ind = xr.DataArray(
labels, coords=dict(time=times), dims="time", name="reference_date"
Expand Down Expand Up @@ -1881,7 +1880,7 @@ def test_resample_first(self) -> None:
array = Dataset({"time": times})["time"]
actual = array.resample(time="1D").last()
expected_times = pd.to_datetime(
["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"]
["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"], unit="ns"
)
expected = DataArray(expected_times, [("time", times[::4])], name="time")
assert_identical(expected, actual)
Expand Down
7 changes: 3 additions & 4 deletions xarray/tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import math
from collections.abc import Generator, Hashable
from copy import copy
from datetime import date, datetime, timedelta
from datetime import date, timedelta
from typing import Any, Callable, Literal

import numpy as np
Expand Down Expand Up @@ -2912,9 +2912,8 @@ def setUp(self) -> None:
"""
month = np.arange(1, 13, 1)
data = np.sin(2 * np.pi * month / 12.0)

darray = DataArray(data, dims=["time"])
darray.coords["time"] = np.array([datetime(2017, m, 1) for m in month])
times = pd.date_range(start="2017-01-01", freq="MS", periods=12)
darray = DataArray(data, dims=["time"], coords=[times])

self.darray = darray

Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
assert_equal,
assert_identical,
assert_no_warnings,
has_pandas_3,
raise_if_dask_computes,
requires_bottleneck,
requires_cupy,
Expand Down Expand Up @@ -252,6 +253,7 @@ def test_0d_object_array_with_list(self):
assert_array_equal(x[0].data, listarray.squeeze())
assert_array_equal(x.squeeze().data, listarray.squeeze())

@pytest.mark.filterwarnings("ignore:Converting non-nanosecond")
def test_index_and_concat_datetime(self):
# regression test for #125
date_range = pd.date_range("2011-09-01", periods=10)
Expand Down Expand Up @@ -2942,8 +2944,8 @@ def test_from_pint_wrapping_dask(self, Var):
(np.array([np.datetime64("2000-01-01", "ns")]), False),
(np.array([np.datetime64("2000-01-01", "s")]), True),
(pd.date_range("2000", periods=1), False),
(datetime(2000, 1, 1), False),
(np.array([datetime(2000, 1, 1)]), False),
(datetime(2000, 1, 1), has_pandas_3),
(np.array([datetime(2000, 1, 1)]), has_pandas_3),
Comment on lines +2947 to +2948
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With pandas 3, pd.Series(datetime.datetime(...)) will produce a Series with np.datetime64[us] values instead of np.datetime64[ns] values, so this conversion now warns.

(pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), False),
(
pd.Series(
Expand Down
Loading