Skip to content

Commit 8423f2c

Browse files
committed
feat: First replacement try of ctime by cftime-rs
1 parent dafd726 commit 8423f2c

File tree

3 files changed

+138
-165
lines changed

3 files changed

+138
-165
lines changed

ci/requirements/environment.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- hdf5
1919
- hypothesis
2020
- iris
21-
- lxml # Optional dep of pydap
21+
- lxml # Optional dep of pydap
2222
- matplotlib-base
2323
- nc-time-axis
2424
- netcdf4
@@ -46,3 +46,5 @@ dependencies:
4646
- toolz
4747
- typing_extensions
4848
- zarr
49+
- pip:
50+
- cftime_rs

xarray/coding/times.py

Lines changed: 53 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from xarray.core.variable import Variable
3030

3131
try:
32-
import cftime
32+
import cftime_rs as cftime
3333
except ImportError:
3434
cftime = None
3535

@@ -231,11 +231,21 @@ def _decode_datetime_with_cftime(
231231
num_dates: np.ndarray, units: str, calendar: str
232232
) -> np.ndarray:
233233
if cftime is None:
234-
raise ModuleNotFoundError("No module named 'cftime'")
234+
raise ModuleNotFoundError("No module named 'cftime_rs'")
235235
if num_dates.size > 0:
236-
return np.asarray(
237-
cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True)
238-
)
236+
try:
237+
res = cftime.num2pydate(
238+
num_dates,
239+
units,
240+
calendar,
241+
)
242+
except ValueError:
243+
res = cftime.num2date(
244+
num_dates,
245+
units,
246+
calendar,
247+
)
248+
return np.asarray(res)
239249
else:
240250
return np.array([], dtype=object)
241251

@@ -248,7 +258,6 @@ def _decode_datetime_with_pandas(
248258
f"Cannot decode times from a non-standard calendar, {calendar!r}, using "
249259
"pandas."
250260
)
251-
252261
time_units, ref_date = _unpack_netcdf_time_units(units)
253262
time_units = _netcdf_to_numpy_timeunit(time_units)
254263
try:
@@ -259,14 +268,12 @@ def _decode_datetime_with_pandas(
259268
# ValueError is raised by pd.Timestamp for non-ISO timestamp
260269
# strings, in which case we fall back to using cftime
261270
raise OutOfBoundsDatetime
262-
263271
with warnings.catch_warnings():
264272
warnings.filterwarnings("ignore", "invalid value encountered", RuntimeWarning)
265273
if flat_num_dates.size > 0:
266274
# avoid size 0 datetimes GH1329
267275
pd.to_timedelta(flat_num_dates.min(), time_units) + ref_date
268276
pd.to_timedelta(flat_num_dates.max(), time_units) + ref_date
269-
270277
# To avoid integer overflow when converting to nanosecond units for integer
271278
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
272279
# arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case
@@ -321,7 +328,6 @@ def decode_cf_datetime(
321328
dates = _decode_datetime_with_cftime(
322329
flat_num_dates.astype(float), units, calendar
323330
)
324-
325331
if (
326332
dates[np.nanargmin(num_dates)].year < 1678
327333
or dates[np.nanargmax(num_dates)].year >= 2262
@@ -410,7 +416,7 @@ def infer_calendar_name(dates) -> CFCalendar:
410416
sample = sample.compute()
411417
if isinstance(sample, np.ndarray):
412418
sample = sample.item()
413-
if isinstance(sample, cftime.datetime):
419+
if isinstance(sample, cftime.PyCFDatetime):
414420
return sample.calendar
415421

416422
# Error raise if dtype is neither datetime or "O", if cftime is not importable, and if element of 'O' dtype is not cftime.
@@ -464,8 +470,10 @@ def infer_timedelta_units(deltas) -> str:
464470
return _infer_time_units_from_diff(unique_timedeltas)
465471

466472

467-
def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray:
468-
"""Given an array of cftime.datetime objects, return an array of
473+
def cftime_to_nptime(
474+
times: list[cftime.PyCFDatetime], raise_on_invalid: bool = True
475+
) -> np.ndarray:
476+
"""Given an array of cftime_rs.PyCFDatetime objects, return an array of
469477
numpy.datetime64 objects of the same size
470478
471479
If raise_on_invalid is True (default), invalid dates trigger a ValueError.
@@ -480,6 +488,7 @@ def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray:
480488
# NumPy casts it safely it np.datetime64[ns] for dates outside
481489
# 1678 to 2262 (this is not currently the case for
482490
# datetime.datetime).
491+
datetime
483492
dt = nanosecond_precision_timestamp(
484493
t.year, t.month, t.day, t.hour, t.minute, t.second, t.microsecond
485494
)
@@ -619,34 +628,46 @@ def _cleanup_netcdf_time_units(units: str) -> str:
619628
return units
620629

621630

622-
def _encode_datetime_with_cftime(dates, units: str, calendar: str) -> np.ndarray:
631+
def encode_datetime_with_cftime(
632+
dates, units: str, calendar: str
633+
) -> np.ndarray[int | float]:
623634
"""Fallback method for encoding dates using cftime.
624635
625636
This method is more flexible than xarray's parsing using datetime64[ns]
626637
arrays but also slower because it loops over each element.
627638
"""
639+
628640
if cftime is None:
629-
raise ModuleNotFoundError("No module named 'cftime'")
641+
raise ModuleNotFoundError("No module named 'cftime-rs'")
642+
643+
dates = np.array(dates)
630644

631645
if np.issubdtype(dates.dtype, np.datetime64):
632646
# numpy's broken datetime conversion only works for us precision
633647
dates = dates.astype("M8[us]").astype(datetime)
634648

635-
def encode_datetime(d):
636-
# Since netCDF files do not support storing float128 values, we ensure
637-
# that float64 values are used by setting longdouble=False in num2date.
638-
# This try except logic can be removed when xarray's minimum version of
639-
# cftime is at least 1.6.2.
640-
try:
641-
return (
642-
np.nan
643-
if d is None
644-
else cftime.date2num(d, units, calendar, longdouble=False)
645-
)
646-
except TypeError:
647-
return np.nan if d is None else cftime.date2num(d, units, calendar)
649+
# Find all the none or NaN position
650+
none_position = np.equal(dates, None)
648651

649-
return np.array([encode_datetime(d) for d in dates.ravel()]).reshape(dates.shape)
652+
# Remove NaN from the dates
653+
filtered_dates = dates[~none_position]
654+
print(filtered_dates)
655+
# encoded_nums will be the same size as filtered_dates
656+
# Try converting to f64 first to avoid unnecessary conversion to i64
657+
try:
658+
encoded_nums = cftime.pydate2num(
659+
filtered_dates.tolist(), units, calendar, dtype="f64"
660+
)
661+
except TypeError:
662+
encoded_nums = cftime.pydate2num(
663+
filtered_dates.tolist(), units, calendar, dtype="i64"
664+
)
665+
666+
# Create a full matrix of NaN
667+
# And fill the num dates in the not NaN or None position
668+
result = np.full(dates.shape, np.nan)
669+
result[np.nonzero(~none_position)] = encoded_nums
670+
return result
650671

651672

652673
def cast_to_int_if_safe(num) -> np.ndarray:
@@ -683,7 +704,6 @@ def encode_cf_datetime(
683704
cftime.date2num
684705
"""
685706
dates = np.asarray(dates)
686-
687707
data_units = infer_datetime_units(dates)
688708

689709
if units is None:
@@ -694,63 +714,10 @@ def encode_cf_datetime(
694714
if calendar is None:
695715
calendar = infer_calendar_name(dates)
696716

697-
try:
698-
if not _is_standard_calendar(calendar) or dates.dtype.kind == "O":
699-
# parse with cftime instead
700-
raise OutOfBoundsDatetime
701-
assert dates.dtype == "datetime64[ns]"
702-
703-
time_units, ref_date = _unpack_time_units_and_ref_date(units)
704-
time_delta = _time_units_to_timedelta64(time_units)
705-
706-
# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
707-
# an OverflowError is raised if the ref_date is too far away from
708-
# dates to be encoded (GH 2272).
709-
dates_as_index = pd.DatetimeIndex(dates.ravel())
710-
time_deltas = dates_as_index - ref_date
711-
712-
# retrieve needed units to faithfully encode to int64
713-
needed_units, data_ref_date = _unpack_time_units_and_ref_date(data_units)
714-
if data_units != units:
715-
# this accounts for differences in the reference times
716-
ref_delta = abs(data_ref_date - ref_date).to_timedelta64()
717-
data_delta = _time_units_to_timedelta64(needed_units)
718-
if (ref_delta % data_delta) > np.timedelta64(0, "ns"):
719-
needed_units = _infer_time_units_from_diff(ref_delta)
720-
721-
# needed time delta to encode faithfully to int64
722-
needed_time_delta = _time_units_to_timedelta64(needed_units)
723-
724-
floor_division = True
725-
if time_delta > needed_time_delta:
726-
floor_division = False
727-
if dtype is None:
728-
emit_user_level_warning(
729-
f"Times can't be serialized faithfully to int64 with requested units {units!r}. "
730-
f"Resolution of {needed_units!r} needed. Serializing times to floating point instead. "
731-
f"Set encoding['dtype'] to integer dtype to serialize to int64. "
732-
f"Set encoding['dtype'] to floating point dtype to silence this warning."
733-
)
734-
elif np.issubdtype(dtype, np.integer):
735-
new_units = f"{needed_units} since {format_timestamp(ref_date)}"
736-
emit_user_level_warning(
737-
f"Times can't be serialized faithfully to int64 with requested units {units!r}. "
738-
f"Serializing with units {new_units!r} instead. "
739-
f"Set encoding['dtype'] to floating point dtype to serialize with units {units!r}. "
740-
f"Set encoding['units'] to {new_units!r} to silence this warning ."
741-
)
742-
units = new_units
743-
time_delta = needed_time_delta
744-
floor_division = True
745-
746-
num = _division(time_deltas, time_delta, floor_division)
747-
num = num.values.reshape(dates.shape)
748-
749-
except (OutOfBoundsDatetime, OverflowError, ValueError):
750-
num = _encode_datetime_with_cftime(dates, units, calendar)
751-
# do it now only for cftime-based flow
752-
# we already covered for this in pandas-based flow
753-
num = cast_to_int_if_safe(num)
717+
num = encode_datetime_with_cftime(dates, units, calendar)
718+
# do it now only for cftime-based flow
719+
# we already covered for this in pandas-based flow
720+
num = cast_to_int_if_safe(num)
754721

755722
return (num, units, calendar)
756723

0 commit comments

Comments
 (0)