From 891fd6ef75b5f7f91d0ed808137cf1ce7c79dc2a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 9 Jun 2024 12:25:55 -0400 Subject: [PATCH 1/7] Address pandas-related upstream test failures --- xarray/coding/cftime_offsets.py | 10 +++++++++- xarray/tests/test_coding_times.py | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 0af75f404a2..c2712569782 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -77,13 +77,21 @@ from xarray.core.types import InclusiveOptions, SideOptions +def _nanosecond_precision_timestamp(*args, **kwargs): + # As of pandas version 3.0, pd.to_datetime(Timestamp(...)) will try to + # infer the appropriate datetime precision. Until xarray supports + # non-nanosecond precision times, we will use this constructor wrapper to + # explicitly create nanosecond-precision Timestamp objects. + return pd.Timestamp(*args, **kwargs).as_unit("ns") + + def get_date_type(calendar, use_cftime=True): """Return the cftime date type for a given calendar name.""" if cftime is None: raise ImportError("cftime is required for dates with non-standard calendars") else: if _is_standard_calendar(calendar) and not use_cftime: - return pd.Timestamp + return _nanosecond_precision_timestamp calendars = { "noleap": cftime.DatetimeNoLeap, diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 9a5589ff872..09221d66066 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -538,11 +538,14 @@ def test_infer_datetime_units(freq, units) -> None: ["dates", "expected"], [ ( - pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"]), + pd.to_datetime(["1900-01-01", "1900-01-02", "NaT"], unit="ns"), "days since 1900-01-01 00:00:00", ), - (pd.to_datetime(["NaT", "1900-01-01"]), "days since 1900-01-01 00:00:00"), - (pd.to_datetime(["NaT"]), "days since 1970-01-01 00:00:00"), + ( + pd.to_datetime(["NaT", "1900-01-01"], unit="ns"), + "days since 1900-01-01 00:00:00", + ), + (pd.to_datetime(["NaT"], unit="ns"), "days since 1970-01-01 00:00:00"), ], ) def test_infer_datetime_units_with_NaT(dates, expected) -> None: From bd875d3039eb333d798cd8690934fa4c7e0c6adf Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 9 Jun 2024 13:28:20 -0400 Subject: [PATCH 2/7] Address more warnings --- xarray/tests/test_backends.py | 2 +- xarray/tests/test_dataarray.py | 8 ++++++-- xarray/tests/test_groupby.py | 5 ++--- xarray/tests/test_plot.py | 7 +++---- xarray/tests/test_variable.py | 9 ++++++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 33039dee7b0..177700a5404 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -529,7 +529,7 @@ def test_roundtrip_string_encoded_characters(self) -> None: assert actual["x"].encoding["_Encoding"] == "ascii" def test_roundtrip_numpy_datetime_data(self) -> None: - times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"]) + times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns") expected = Dataset({"t": ("t", times), "t0": times[0]}) kwargs = {"encoding": {"t0": {"units": "days since 1950-01-01"}}} with self.roundtrip(expected, save_kwargs=kwargs) as actual: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4e916d62155..45c7dc80a30 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3647,7 +3647,9 @@ def test_to_and_from_dict_with_time_dim(self) -> None: t = pd.date_range("20130101", periods=10) lat = [77.7, 83.2, 76] da = DataArray(x, {"t": t, "lat": lat}, dims=["t", "lat"]) - roundtripped = DataArray.from_dict(da.to_dict()) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Converting non-nanosecond") + roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) def test_to_and_from_dict_with_nan_nat(self) -> None: @@ -3657,7 +3659,9 @@ def test_to_and_from_dict_with_nan_nat(self) -> None: t[2] = np.nan lat = [77.7, 83.2, 76] da = DataArray(y, {"t": t, "lat": lat}, dims=["t", "lat"]) - roundtripped = DataArray.from_dict(da.to_dict()) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Converting non-nanosecond") + roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) def test_to_dict_with_numpy_attrs(self) -> None: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 7134fe96d01..a18b18f930c 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -139,8 +139,7 @@ def test_groupby_da_datetime() -> None: times = pd.date_range("2000-01-01", periods=4) foo = xr.DataArray([1, 2, 3, 4], coords=dict(time=times), dims="time") # create test index - dd = times.to_pydatetime() - reference_dates = [dd[0], dd[2]] + reference_dates = [times[0], times[2]] labels = reference_dates[0:1] * 2 + reference_dates[1:2] * 2 ind = xr.DataArray( labels, coords=dict(time=times), dims="time", name="reference_date" @@ -1881,7 +1880,7 @@ def test_resample_first(self) -> None: array = Dataset({"time": times})["time"] actual = array.resample(time="1D").last() expected_times = pd.to_datetime( - ["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"] + ["2000-01-01T18", "2000-01-02T18", "2000-01-03T06"], unit="ns" ) expected = DataArray(expected_times, [("time", times[::4])], name="time") assert_identical(expected, actual) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 27f4ded5646..21dc12d6f38 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -5,7 +5,7 @@ import math from collections.abc import Generator, Hashable from copy import copy -from datetime import date, datetime, timedelta +from datetime import date, timedelta from typing import Any, Callable, Literal import numpy as np @@ -2912,9 +2912,8 @@ def setUp(self) -> None: """ month = np.arange(1, 13, 1) data = np.sin(2 * np.pi * month / 12.0) - - darray = DataArray(data, dims=["time"]) - darray.coords["time"] = np.array([datetime(2017, m, 1) for m in month]) + times = pd.date_range(start="2017-01-01", freq="ME", periods=12) + darray = DataArray(data, dims=["time"], coords=[times]) self.darray = darray diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 3167de2e2f0..120e0ceef96 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -40,6 +40,7 @@ requires_bottleneck, requires_cupy, requires_dask, + requires_pandas_3, requires_pint, requires_sparse, source_ndarray, @@ -256,7 +257,9 @@ def test_index_and_concat_datetime(self): # regression test for #125 date_range = pd.date_range("2011-09-01", periods=10) for dates in [date_range, date_range.values, date_range.to_pydatetime()]: - expected = self.cls("t", dates) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Converting non-nanosecond") + expected = self.cls("t", dates) for times in [ [expected[i] for i in range(10)], [expected[i : (i + 1)] for i in range(10)], @@ -2942,8 +2945,8 @@ def test_from_pint_wrapping_dask(self, Var): (np.array([np.datetime64("2000-01-01", "ns")]), False), (np.array([np.datetime64("2000-01-01", "s")]), True), (pd.date_range("2000", periods=1), False), - (datetime(2000, 1, 1), False), - (np.array([datetime(2000, 1, 1)]), False), + pytest.param(datetime(2000, 1, 1), True, marks=requires_pandas_3), + pytest.param(np.array([datetime(2000, 1, 1)]), True, marks=requires_pandas_3), (pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), False), ( pd.Series( From 616c1795a579cf23c42df5dbc79aaa69de998a91 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 9 Jun 2024 13:48:50 -0400 Subject: [PATCH 3/7] Don't lose coverage for pandas < 3 --- xarray/tests/test_variable.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 120e0ceef96..c5452067950 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -36,11 +36,11 @@ assert_equal, assert_identical, assert_no_warnings, + has_pandas_3, raise_if_dask_computes, requires_bottleneck, requires_cupy, requires_dask, - requires_pandas_3, requires_pint, requires_sparse, source_ndarray, @@ -2945,8 +2945,8 @@ def test_from_pint_wrapping_dask(self, Var): (np.array([np.datetime64("2000-01-01", "ns")]), False), (np.array([np.datetime64("2000-01-01", "s")]), True), (pd.date_range("2000", periods=1), False), - pytest.param(datetime(2000, 1, 1), True, marks=requires_pandas_3), - pytest.param(np.array([datetime(2000, 1, 1)]), True, marks=requires_pandas_3), + (datetime(2000, 1, 1), has_pandas_3), + (np.array([datetime(2000, 1, 1)]), has_pandas_3), (pd.date_range("2000", periods=1, tz=pytz.timezone("America/New_York")), False), ( pd.Series( From 1a3bdf6d04f58d032f9e60742e53f75860371e3f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 9 Jun 2024 14:08:32 -0400 Subject: [PATCH 4/7] Address one more warning --- xarray/tests/test_combine.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index ea1659e4539..f239304702f 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -1,6 +1,5 @@ from __future__ import annotations -from datetime import datetime from itertools import product import numpy as np @@ -229,8 +228,22 @@ def test_lexicographic_sort_string_coords(self): assert concat_dims == ["simulation"] def test_datetime_coords(self): - ds0 = Dataset({"time": [datetime(2000, 3, 6), datetime(2001, 3, 7)]}) - ds1 = Dataset({"time": [datetime(1999, 1, 1), datetime(1999, 2, 4)]}) + ds0 = Dataset( + { + "time": [ + np.datetime64("2000-03-06", "ns"), + np.datetime64("2000-03-07", "ns"), + ] + } + ) + ds1 = Dataset( + { + "time": [ + np.datetime64("1999-01-01", "ns"), + np.datetime64("1999-02-04", "ns"), + ] + } + ) expected = {(0,): ds1, (1,): ds0} actual, concat_dims = _infer_concat_order_from_coords([ds0, ds1]) From 334d118d5d5203ff31cc42c493f39cc34f9fd79f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 9 Jun 2024 14:28:04 -0400 Subject: [PATCH 5/7] Fix accidental change from MS to ME --- xarray/tests/test_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 21dc12d6f38..a44b621a981 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2912,7 +2912,7 @@ def setUp(self) -> None: """ month = np.arange(1, 13, 1) data = np.sin(2 * np.pi * month / 12.0) - times = pd.date_range(start="2017-01-01", freq="ME", periods=12) + times = pd.date_range(start="2017-01-01", freq="MS", periods=12) darray = DataArray(data, dims=["time"], coords=[times]) self.darray = darray From 36a005a2bc6bd4aab2024135253005269ae818ed Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 10 Jun 2024 07:39:56 -0400 Subject: [PATCH 6/7] Use datetime64[ns] arrays --- xarray/tests/test_combine.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index f239304702f..aad7103c112 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -229,20 +229,10 @@ def test_lexicographic_sort_string_coords(self): def test_datetime_coords(self): ds0 = Dataset( - { - "time": [ - np.datetime64("2000-03-06", "ns"), - np.datetime64("2000-03-07", "ns"), - ] - } + {"time": np.array(["2000-03-06", "2000-03-07"], dtype="datetime64[ns]")} ) ds1 = Dataset( - { - "time": [ - np.datetime64("1999-01-01", "ns"), - np.datetime64("1999-02-04", "ns"), - ] - } + {"time": np.array(["1999-01-01", "1999-02-04"], dtype="datetime64[ns]")} ) expected = {(0,): ds1, (1,): ds0} From 85c95a105e160cf23cf63c578c6944a937dc7859 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 10 Jun 2024 07:52:05 -0400 Subject: [PATCH 7/7] Switch to @pytest.mark.filterwarnings --- xarray/tests/test_dataarray.py | 10 ++++------ xarray/tests/test_variable.py | 5 ++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 45c7dc80a30..86179df3b8f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3642,16 +3642,16 @@ def test_to_and_from_dict( actual_no_data = da.to_dict(data=False, encoding=encoding) assert expected_no_data == actual_no_data + @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_to_and_from_dict_with_time_dim(self) -> None: x = np.random.randn(10, 3) t = pd.date_range("20130101", periods=10) lat = [77.7, 83.2, 76] da = DataArray(x, {"t": t, "lat": lat}, dims=["t", "lat"]) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Converting non-nanosecond") - roundtripped = DataArray.from_dict(da.to_dict()) + roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) + @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_to_and_from_dict_with_nan_nat(self) -> None: y = np.random.randn(10, 3) y[2] = np.nan @@ -3659,9 +3659,7 @@ def test_to_and_from_dict_with_nan_nat(self) -> None: t[2] = np.nan lat = [77.7, 83.2, 76] da = DataArray(y, {"t": t, "lat": lat}, dims=["t", "lat"]) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Converting non-nanosecond") - roundtripped = DataArray.from_dict(da.to_dict()) + roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) def test_to_dict_with_numpy_attrs(self) -> None: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c5452067950..081bf09484a 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -253,13 +253,12 @@ def test_0d_object_array_with_list(self): assert_array_equal(x[0].data, listarray.squeeze()) assert_array_equal(x.squeeze().data, listarray.squeeze()) + @pytest.mark.filterwarnings("ignore:Converting non-nanosecond") def test_index_and_concat_datetime(self): # regression test for #125 date_range = pd.date_range("2011-09-01", periods=10) for dates in [date_range, date_range.values, date_range.to_pydatetime()]: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Converting non-nanosecond") - expected = self.cls("t", dates) + expected = self.cls("t", dates) for times in [ [expected[i] for i in range(10)], [expected[i : (i + 1)] for i in range(10)],