29
29
from xarray .core .variable import Variable
30
30
31
31
try :
32
- import cftime
32
+ import cftime_rs as cftime
33
33
except ImportError :
34
34
cftime = None
35
35
@@ -231,11 +231,21 @@ def _decode_datetime_with_cftime(
231
231
num_dates : np .ndarray , units : str , calendar : str
232
232
) -> np .ndarray :
233
233
if cftime is None :
234
- raise ModuleNotFoundError ("No module named 'cftime '" )
234
+ raise ModuleNotFoundError ("No module named 'cftime_rs '" )
235
235
if num_dates .size > 0 :
236
- return np .asarray (
237
- cftime .num2date (num_dates , units , calendar , only_use_cftime_datetimes = True )
238
- )
236
+ try :
237
+ res = cftime .num2pydate (
238
+ num_dates ,
239
+ units ,
240
+ calendar ,
241
+ )
242
+ except ValueError :
243
+ res = cftime .num2date (
244
+ num_dates ,
245
+ units ,
246
+ calendar ,
247
+ )
248
+ return np .asarray (res )
239
249
else :
240
250
return np .array ([], dtype = object )
241
251
@@ -248,7 +258,6 @@ def _decode_datetime_with_pandas(
248
258
f"Cannot decode times from a non-standard calendar, { calendar !r} , using "
249
259
"pandas."
250
260
)
251
-
252
261
time_units , ref_date = _unpack_netcdf_time_units (units )
253
262
time_units = _netcdf_to_numpy_timeunit (time_units )
254
263
try :
@@ -259,14 +268,12 @@ def _decode_datetime_with_pandas(
259
268
# ValueError is raised by pd.Timestamp for non-ISO timestamp
260
269
# strings, in which case we fall back to using cftime
261
270
raise OutOfBoundsDatetime
262
-
263
271
with warnings .catch_warnings ():
264
272
warnings .filterwarnings ("ignore" , "invalid value encountered" , RuntimeWarning )
265
273
if flat_num_dates .size > 0 :
266
274
# avoid size 0 datetimes GH1329
267
275
pd .to_timedelta (flat_num_dates .min (), time_units ) + ref_date
268
276
pd .to_timedelta (flat_num_dates .max (), time_units ) + ref_date
269
-
270
277
# To avoid integer overflow when converting to nanosecond units for integer
271
278
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
272
279
# arrays to np.int64 (GH 2002, GH 6589). Note this is safe even in the case
@@ -321,7 +328,6 @@ def decode_cf_datetime(
321
328
dates = _decode_datetime_with_cftime (
322
329
flat_num_dates .astype (float ), units , calendar
323
330
)
324
-
325
331
if (
326
332
dates [np .nanargmin (num_dates )].year < 1678
327
333
or dates [np .nanargmax (num_dates )].year >= 2262
@@ -410,7 +416,7 @@ def infer_calendar_name(dates) -> CFCalendar:
410
416
sample = sample .compute ()
411
417
if isinstance (sample , np .ndarray ):
412
418
sample = sample .item ()
413
- if isinstance (sample , cftime .datetime ):
419
+ if isinstance (sample , cftime .PyCFDatetime ):
414
420
return sample .calendar
415
421
416
422
# Error raise if dtype is neither datetime or "O", if cftime is not importable, and if element of 'O' dtype is not cftime.
@@ -464,8 +470,10 @@ def infer_timedelta_units(deltas) -> str:
464
470
return _infer_time_units_from_diff (unique_timedeltas )
465
471
466
472
467
- def cftime_to_nptime (times , raise_on_invalid : bool = True ) -> np .ndarray :
468
- """Given an array of cftime.datetime objects, return an array of
473
+ def cftime_to_nptime (
474
+ times : list [cftime .PyCFDatetime ], raise_on_invalid : bool = True
475
+ ) -> np .ndarray :
476
+ """Given an array of cftime_rs.PyCFDatetime objects, return an array of
469
477
numpy.datetime64 objects of the same size
470
478
471
479
If raise_on_invalid is True (default), invalid dates trigger a ValueError.
@@ -480,6 +488,7 @@ def cftime_to_nptime(times, raise_on_invalid: bool = True) -> np.ndarray:
480
488
# NumPy casts it safely it np.datetime64[ns] for dates outside
481
489
# 1678 to 2262 (this is not currently the case for
482
490
# datetime.datetime).
491
+ datetime
483
492
dt = nanosecond_precision_timestamp (
484
493
t .year , t .month , t .day , t .hour , t .minute , t .second , t .microsecond
485
494
)
@@ -619,34 +628,46 @@ def _cleanup_netcdf_time_units(units: str) -> str:
619
628
return units
620
629
621
630
622
- def _encode_datetime_with_cftime (dates , units : str , calendar : str ) -> np .ndarray :
631
+ def encode_datetime_with_cftime (
632
+ dates , units : str , calendar : str
633
+ ) -> np .ndarray [int | float ]:
623
634
"""Fallback method for encoding dates using cftime.
624
635
625
636
This method is more flexible than xarray's parsing using datetime64[ns]
626
637
arrays but also slower because it loops over each element.
627
638
"""
639
+
628
640
if cftime is None :
629
- raise ModuleNotFoundError ("No module named 'cftime'" )
641
+ raise ModuleNotFoundError ("No module named 'cftime-rs'" )
642
+
643
+ dates = np .array (dates )
630
644
631
645
if np .issubdtype (dates .dtype , np .datetime64 ):
632
646
# numpy's broken datetime conversion only works for us precision
633
647
dates = dates .astype ("M8[us]" ).astype (datetime )
634
648
635
- def encode_datetime (d ):
636
- # Since netCDF files do not support storing float128 values, we ensure
637
- # that float64 values are used by setting longdouble=False in num2date.
638
- # This try except logic can be removed when xarray's minimum version of
639
- # cftime is at least 1.6.2.
640
- try :
641
- return (
642
- np .nan
643
- if d is None
644
- else cftime .date2num (d , units , calendar , longdouble = False )
645
- )
646
- except TypeError :
647
- return np .nan if d is None else cftime .date2num (d , units , calendar )
649
+ # Find all the none or NaN position
650
+ none_position = np .equal (dates , None )
648
651
649
- return np .array ([encode_datetime (d ) for d in dates .ravel ()]).reshape (dates .shape )
652
+ # Remove NaN from the dates
653
+ filtered_dates = dates [~ none_position ]
654
+ print (filtered_dates )
655
+ # encoded_nums will be the same size as filtered_dates
656
+ # Try converting to f64 first to avoid unnecessary conversion to i64
657
+ try :
658
+ encoded_nums = cftime .pydate2num (
659
+ filtered_dates .tolist (), units , calendar , dtype = "f64"
660
+ )
661
+ except TypeError :
662
+ encoded_nums = cftime .pydate2num (
663
+ filtered_dates .tolist (), units , calendar , dtype = "i64"
664
+ )
665
+
666
+ # Create a full matrix of NaN
667
+ # And fill the num dates in the not NaN or None position
668
+ result = np .full (dates .shape , np .nan )
669
+ result [np .nonzero (~ none_position )] = encoded_nums
670
+ return result
650
671
651
672
652
673
def cast_to_int_if_safe (num ) -> np .ndarray :
@@ -683,7 +704,6 @@ def encode_cf_datetime(
683
704
cftime.date2num
684
705
"""
685
706
dates = np .asarray (dates )
686
-
687
707
data_units = infer_datetime_units (dates )
688
708
689
709
if units is None :
@@ -694,63 +714,10 @@ def encode_cf_datetime(
694
714
if calendar is None :
695
715
calendar = infer_calendar_name (dates )
696
716
697
- try :
698
- if not _is_standard_calendar (calendar ) or dates .dtype .kind == "O" :
699
- # parse with cftime instead
700
- raise OutOfBoundsDatetime
701
- assert dates .dtype == "datetime64[ns]"
702
-
703
- time_units , ref_date = _unpack_time_units_and_ref_date (units )
704
- time_delta = _time_units_to_timedelta64 (time_units )
705
-
706
- # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
707
- # an OverflowError is raised if the ref_date is too far away from
708
- # dates to be encoded (GH 2272).
709
- dates_as_index = pd .DatetimeIndex (dates .ravel ())
710
- time_deltas = dates_as_index - ref_date
711
-
712
- # retrieve needed units to faithfully encode to int64
713
- needed_units , data_ref_date = _unpack_time_units_and_ref_date (data_units )
714
- if data_units != units :
715
- # this accounts for differences in the reference times
716
- ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
717
- data_delta = _time_units_to_timedelta64 (needed_units )
718
- if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
719
- needed_units = _infer_time_units_from_diff (ref_delta )
720
-
721
- # needed time delta to encode faithfully to int64
722
- needed_time_delta = _time_units_to_timedelta64 (needed_units )
723
-
724
- floor_division = True
725
- if time_delta > needed_time_delta :
726
- floor_division = False
727
- if dtype is None :
728
- emit_user_level_warning (
729
- f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
730
- f"Resolution of { needed_units !r} needed. Serializing times to floating point instead. "
731
- f"Set encoding['dtype'] to integer dtype to serialize to int64. "
732
- f"Set encoding['dtype'] to floating point dtype to silence this warning."
733
- )
734
- elif np .issubdtype (dtype , np .integer ):
735
- new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
736
- emit_user_level_warning (
737
- f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
738
- f"Serializing with units { new_units !r} instead. "
739
- f"Set encoding['dtype'] to floating point dtype to serialize with units { units !r} . "
740
- f"Set encoding['units'] to { new_units !r} to silence this warning ."
741
- )
742
- units = new_units
743
- time_delta = needed_time_delta
744
- floor_division = True
745
-
746
- num = _division (time_deltas , time_delta , floor_division )
747
- num = num .values .reshape (dates .shape )
748
-
749
- except (OutOfBoundsDatetime , OverflowError , ValueError ):
750
- num = _encode_datetime_with_cftime (dates , units , calendar )
751
- # do it now only for cftime-based flow
752
- # we already covered for this in pandas-based flow
753
- num = cast_to_int_if_safe (num )
717
+ num = encode_datetime_with_cftime (dates , units , calendar )
718
+ # do it now only for cftime-based flow
719
+ # we already covered for this in pandas-based flow
720
+ num = cast_to_int_if_safe (num )
754
721
755
722
return (num , units , calendar )
756
723
0 commit comments