diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b4ef3c4c28c..0b611e88453 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,8 @@ New Features Bug fixes ~~~~~~~~~ +- Fix bug where datetime64 times are silently changed to incorrect values if they are outside the valid date range for ns precision when provided in some other units (:issue:`4427`, :pull:`4454`). + By `Andrew Pauling `_ - Fix silently overwriting the `engine` key when passing :py:func:`open_dataset` a file object to an incompatible netCDF (:issue:`4457`). Now incompatible combinations of files and engines raise an exception instead. By `Alessandro Amici `_. diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index ff2113f7c14..f3a75034058 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -255,7 +255,9 @@ def map_blocks( to the function being applied in ``xr.map_blocks()``: >>> array.map_blocks( - ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array, + ... calculate_anomaly, + ... kwargs={"groupby_type": "time.year"}, + ... template=array, ... ) # doctest: +ELLIPSIS dask.array diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c55e61cb816..f4ced459a3a 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -177,7 +177,9 @@ def _maybe_wrap_data(data): def _possibly_convert_objects(values): """Convert arrays of datetime.datetime and datetime.timedelta objects into - datetime64 and timedelta64, according to the pandas convention. + datetime64 and timedelta64, according to the pandas convention. Also used for + validating that datetime64 and timedelta64 objects are within the valid date + range for ns precision, as pandas will raise an error if they are not. """ return np.asarray(pd.Series(values.ravel())).reshape(values.shape) @@ -238,16 +240,16 @@ def as_compatible_data(data, fastpath=False): '"1"' ) - # validate whether the data is valid data types + # validate whether the data is valid data types. data = np.asarray(data) if isinstance(data, np.ndarray): if data.dtype.kind == "O": data = _possibly_convert_objects(data) elif data.dtype.kind == "M": - data = np.asarray(data, "datetime64[ns]") + data = _possibly_convert_objects(data) elif data.dtype.kind == "m": - data = np.asarray(data, "timedelta64[ns]") + data = _possibly_convert_objects(data) return _maybe_wrap_data(data) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index efebe09e2ec..08fe0739760 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -294,6 +294,19 @@ def test_object_conversion(self): actual = self.cls("x", data) assert actual.dtype == data.dtype + def test_datetime64_valid_range(self): + data = np.datetime64("1250-01-01", "us") + pderror = pd.errors.OutOfBoundsDatetime + with raises_regex(pderror, "Out of bounds nanosecond"): + self.cls(["t"], [data]) + + @pytest.mark.xfail(reason="pandas issue 36615") + def test_timedelta64_valid_range(self): + data = np.timedelta64("200000", "D") + pderror = pd.errors.OutOfBoundsTimedelta + with raises_regex(pderror, "Out of bounds nanosecond"): + self.cls(["t"], [data]) + def test_pandas_data(self): v = self.cls(["x"], pd.Series([0, 1, 2], index=[3, 2, 1])) assert_identical(v, v[[0, 1, 2]])