Skip to content

Commit a74a605

Browse files
oliverhiggspre-commit-ci[bot]dcherianspencerkclark
authored
Support additional dtypes in resample (#9413)
* Support additional dtypes to resample pandas.BaseOffset, pandas.Timedelta, datetime.timedelta, and BaseCFTimeOffset are now all supported datatypes for resampling. * Update whats-new * Fix types * Add unit test * Fix test * Support more dtypes for CFTimeIndex resampling * Tidy resample type hints * Fix some mypy bugs * Fixes * Fix tests * WIP * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update doc/whats-new.rst * Apply suggestions from code review Co-authored-by: Spencer Clark <[email protected]> * Fix mypy error * Fix bad edit --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]> Co-authored-by: Spencer Clark <[email protected]>
1 parent 68b040a commit a74a605

File tree

9 files changed

+168
-33
lines changed

9 files changed

+168
-33
lines changed

doc/whats-new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ Bug fixes
7878
- Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray``
7979
in NumPy 2.0 (:issue:`9312`, :pull:`9393`)
8080
By `Andrew Scherer <https://github.com/andrew-s28>`_.
81+
- Fix support for using ``pandas.DateOffset``, ``pandas.Timedelta``, and
82+
``datetime.timedelta`` objects as ``resample`` frequencies
83+
(:issue:`9408`, :pull:`9413`).
84+
By `Oliver Higgs <https://github.com/oliverhiggs>`_.
8185

8286
Performance
8387
~~~~~~~~~~~

xarray/coding/cftime_offsets.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from collections.abc import Mapping
4848
from datetime import datetime, timedelta
4949
from functools import partial
50-
from typing import TYPE_CHECKING, ClassVar, Literal
50+
from typing import TYPE_CHECKING, ClassVar, Literal, TypeVar
5151

5252
import numpy as np
5353
import pandas as pd
@@ -80,6 +80,7 @@
8080

8181

8282
DayOption: TypeAlias = Literal["start", "end"]
83+
T_FreqStr = TypeVar("T_FreqStr", str, None)
8384

8485

8586
def _nanosecond_precision_timestamp(*args, **kwargs):
@@ -772,11 +773,18 @@ def _emit_freq_deprecation_warning(deprecated_freq):
772773
emit_user_level_warning(message, FutureWarning)
773774

774775

775-
def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffset:
776+
def to_offset(
777+
freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset,
778+
warn: bool = True,
779+
) -> BaseCFTimeOffset:
776780
"""Convert a frequency string to the appropriate subclass of
777781
BaseCFTimeOffset."""
778782
if isinstance(freq, BaseCFTimeOffset):
779783
return freq
784+
if isinstance(freq, timedelta | pd.Timedelta):
785+
return delta_to_tick(freq)
786+
if isinstance(freq, pd.DateOffset):
787+
freq = _legacy_to_new_freq(freq.freqstr)
780788

781789
match = re.match(_PATTERN, freq)
782790
if match is None:
@@ -791,6 +799,34 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs
791799
return _FREQUENCIES[freq](n=multiples)
792800

793801

802+
def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick:
803+
"""Adapted from pandas.tslib.delta_to_tick"""
804+
if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0:
805+
# pandas.Timedelta has nanoseconds, but these are not supported
806+
raise ValueError(
807+
"Unable to convert 'pandas.Timedelta' object with non-zero "
808+
"nanoseconds to 'CFTimeOffset' object"
809+
)
810+
if delta.microseconds == 0:
811+
if delta.seconds == 0:
812+
return Day(n=delta.days)
813+
else:
814+
seconds = delta.days * 86400 + delta.seconds
815+
if seconds % 3600 == 0:
816+
return Hour(n=seconds // 3600)
817+
elif seconds % 60 == 0:
818+
return Minute(n=seconds // 60)
819+
else:
820+
return Second(n=seconds)
821+
else:
822+
# Regardless of the days and seconds this will always be a Millisecond
823+
# or Microsecond object
824+
if delta.microseconds % 1_000 == 0:
825+
return Millisecond(n=delta.microseconds // 1_000)
826+
else:
827+
return Microsecond(n=delta.microseconds)
828+
829+
794830
def to_cftime_datetime(date_str_or_date, calendar=None):
795831
if cftime is None:
796832
raise ModuleNotFoundError("No module named 'cftime'")
@@ -1332,7 +1368,7 @@ def _new_to_legacy_freq(freq):
13321368
return freq
13331369

13341370

1335-
def _legacy_to_new_freq(freq):
1371+
def _legacy_to_new_freq(freq: T_FreqStr) -> T_FreqStr:
13361372
# to avoid internal deprecation warnings when freq is determined using pandas < 2.2
13371373

13381374
# TODO: remove once requiring pandas >= 2.2

xarray/core/common.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import datetime
34
import warnings
45
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping
56
from contextlib import suppress
@@ -13,6 +14,7 @@
1314
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
1415
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
1516
from xarray.core.options import OPTIONS, _get_keep_attrs
17+
from xarray.core.types import ResampleCompatible
1618
from xarray.core.utils import (
1719
Frozen,
1820
either_dict_or_kwargs,
@@ -32,8 +34,6 @@
3234

3335

3436
if TYPE_CHECKING:
35-
import datetime
36-
3737
from numpy.typing import DTypeLike
3838

3939
from xarray.core.dataarray import DataArray
@@ -891,14 +891,14 @@ def rolling_exp(
891891
def _resample(
892892
self,
893893
resample_cls: type[T_Resample],
894-
indexer: Mapping[Hashable, str | Resampler] | None,
894+
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None,
895895
skipna: bool | None,
896896
closed: SideOptions | None,
897897
label: SideOptions | None,
898898
offset: pd.Timedelta | datetime.timedelta | str | None,
899899
origin: str | DatetimeLike,
900900
restore_coord_dims: bool | None,
901-
**indexer_kwargs: str | Resampler,
901+
**indexer_kwargs: ResampleCompatible | Resampler,
902902
) -> T_Resample:
903903
"""Returns a Resample object for performing resampling operations.
904904
@@ -1078,14 +1078,18 @@ def _resample(
10781078
)
10791079

10801080
grouper: Resampler
1081-
if isinstance(freq, str):
1081+
if isinstance(freq, ResampleCompatible):
10821082
grouper = TimeResampler(
10831083
freq=freq, closed=closed, label=label, origin=origin, offset=offset
10841084
)
10851085
elif isinstance(freq, Resampler):
10861086
grouper = freq
10871087
else:
1088-
raise ValueError("freq must be a str or a Resampler object")
1088+
raise ValueError(
1089+
"freq must be an object of type 'str', 'datetime.timedelta', "
1090+
"'pandas.Timedelta', 'pandas.DateOffset', or 'TimeResampler'. "
1091+
f"Received {type(freq)} instead."
1092+
)
10891093

10901094
rgrouper = ResolvedGrouper(grouper, group, self)
10911095

xarray/core/dataarray.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
QueryEngineOptions,
112112
QueryParserOptions,
113113
ReindexMethodOptions,
114+
ResampleCompatible,
114115
Self,
115116
SideOptions,
116117
T_ChunkDimFreq,
@@ -7269,15 +7270,15 @@ def coarsen(
72697270
@_deprecate_positional_args("v2024.07.0")
72707271
def resample(
72717272
self,
7272-
indexer: Mapping[Hashable, str | Resampler] | None = None,
7273+
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None = None,
72737274
*,
72747275
skipna: bool | None = None,
72757276
closed: SideOptions | None = None,
72767277
label: SideOptions | None = None,
72777278
offset: pd.Timedelta | datetime.timedelta | str | None = None,
72787279
origin: str | DatetimeLike = "start_day",
72797280
restore_coord_dims: bool | None = None,
7280-
**indexer_kwargs: str | Resampler,
7281+
**indexer_kwargs: ResampleCompatible | Resampler,
72817282
) -> DataArrayResample:
72827283
"""Returns a Resample object for performing resampling operations.
72837284
@@ -7288,7 +7289,7 @@ def resample(
72887289
72897290
Parameters
72907291
----------
7291-
indexer : Mapping of Hashable to str, optional
7292+
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
72927293
Mapping from the dimension name to resample frequency [1]_. The
72937294
dimension must be datetime-like.
72947295
skipna : bool, optional
@@ -7312,7 +7313,7 @@ def resample(
73127313
restore_coord_dims : bool, optional
73137314
If True, also restore the dimension order of multi-dimensional
73147315
coordinates.
7315-
**indexer_kwargs : str
7316+
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
73167317
The keyword arguments form of ``indexer``.
73177318
One of indexer or indexer_kwargs must be provided.
73187319

xarray/core/dataset.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
QueryEngineOptions,
164164
QueryParserOptions,
165165
ReindexMethodOptions,
166+
ResampleCompatible,
166167
SideOptions,
167168
T_ChunkDimFreq,
168169
T_DatasetPadConstantValues,
@@ -10710,15 +10711,15 @@ def coarsen(
1071010711
@_deprecate_positional_args("v2024.07.0")
1071110712
def resample(
1071210713
self,
10713-
indexer: Mapping[Any, str | Resampler] | None = None,
10714+
indexer: Mapping[Any, ResampleCompatible | Resampler] | None = None,
1071410715
*,
1071510716
skipna: bool | None = None,
1071610717
closed: SideOptions | None = None,
1071710718
label: SideOptions | None = None,
1071810719
offset: pd.Timedelta | datetime.timedelta | str | None = None,
1071910720
origin: str | DatetimeLike = "start_day",
1072010721
restore_coord_dims: bool | None = None,
10721-
**indexer_kwargs: str | Resampler,
10722+
**indexer_kwargs: ResampleCompatible | Resampler,
1072210723
) -> DatasetResample:
1072310724
"""Returns a Resample object for performing resampling operations.
1072410725
@@ -10729,7 +10730,7 @@ def resample(
1072910730
1073010731
Parameters
1073110732
----------
10732-
indexer : Mapping of Hashable to str, optional
10733+
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
1073310734
Mapping from the dimension name to resample frequency [1]_. The
1073410735
dimension must be datetime-like.
1073510736
skipna : bool, optional
@@ -10753,7 +10754,7 @@ def resample(
1075310754
restore_coord_dims : bool, optional
1075410755
If True, also restore the dimension order of multi-dimensional
1075510756
coordinates.
10756-
**indexer_kwargs : str
10757+
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
1075710758
The keyword arguments form of ``indexer``.
1075810759
One of indexer or indexer_kwargs must be provided.
1075910760

xarray/core/resample_cftime.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
from xarray.core.types import SideOptions
5959

6060
if typing.TYPE_CHECKING:
61-
from xarray.core.types import CFTimeDatetime
61+
from xarray.core.types import CFTimeDatetime, ResampleCompatible
6262

6363

6464
class CFTimeGrouper:
@@ -75,7 +75,7 @@ class CFTimeGrouper:
7575

7676
def __init__(
7777
self,
78-
freq: str | BaseCFTimeOffset,
78+
freq: ResampleCompatible | BaseCFTimeOffset,
7979
closed: SideOptions | None = None,
8080
label: SideOptions | None = None,
8181
origin: str | CFTimeDatetime = "start_day",

xarray/core/types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,5 @@ def copy(
318318
Bins = Union[
319319
int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index
320320
]
321+
322+
ResampleCompatible: TypeAlias = str | datetime.timedelta | pd.Timedelta | pd.DateOffset

xarray/groupers.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,20 @@
1414
import numpy as np
1515
import pandas as pd
1616

17-
from xarray.coding.cftime_offsets import _new_to_legacy_freq
17+
from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq
1818
from xarray.core import duck_array_ops
1919
from xarray.core.coordinates import Coordinates
2020
from xarray.core.dataarray import DataArray
2121
from xarray.core.groupby import T_Group, _DummyGroup
2222
from xarray.core.indexes import safe_cast_to_index
2323
from xarray.core.resample_cftime import CFTimeGrouper
24-
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
24+
from xarray.core.types import (
25+
Bins,
26+
DatetimeLike,
27+
GroupIndices,
28+
ResampleCompatible,
29+
SideOptions,
30+
)
2531
from xarray.core.variable import Variable
2632

2733
__all__ = [
@@ -336,7 +342,7 @@ class TimeResampler(Resampler):
336342
337343
Attributes
338344
----------
339-
freq : str
345+
freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset
340346
Frequency to resample to. See `Pandas frequency
341347
aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
342348
for a list of possible values.
@@ -358,7 +364,7 @@ class TimeResampler(Resampler):
358364
An offset timedelta added to the origin.
359365
"""
360366

361-
freq: str
367+
freq: ResampleCompatible
362368
closed: SideOptions | None = field(default=None)
363369
label: SideOptions | None = field(default=None)
364370
origin: str | DatetimeLike = field(default="start_day")
@@ -388,6 +394,12 @@ def _init_properties(self, group: T_Group) -> None:
388394
offset=offset,
389395
)
390396
else:
397+
if isinstance(self.freq, BaseCFTimeOffset):
398+
raise ValueError(
399+
"'BaseCFTimeOffset' resample frequencies are only supported "
400+
"when resampling a 'CFTimeIndex'"
401+
)
402+
391403
self.index_grouper = pd.Grouper(
392404
# TODO remove once requiring pandas >= 2.2
393405
freq=_new_to_legacy_freq(self.freq),

0 commit comments

Comments
 (0)