Skip to content

Commit 03d9c61

Browse files
committed
Refactor resampling.
Toward pydata#8510 1. Rename to Resampler from ResampleGrouper 2. Move code from common.resample to TimeResampler
1 parent 41d33f5 commit 03d9c61

File tree

2 files changed

+84
-64
lines changed

2 files changed

+84
-64
lines changed

xarray/core/common.py

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from xarray.core.utils import (
1919
Frozen,
2020
either_dict_or_kwargs,
21-
emit_user_level_warning,
2221
is_scalar,
2322
)
2423
from xarray.namedarray.core import _raise_if_any_duplicate_dimensions
@@ -984,8 +983,7 @@ def _resample(
984983
# TODO support non-string indexer after removing the old API.
985984

986985
from xarray.core.dataarray import DataArray
987-
from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper
988-
from xarray.core.pdcompat import _convert_base_to_offset
986+
from xarray.core.groupby import ResolvedTimeResampler, TimeResampler
989987
from xarray.core.resample import RESAMPLE_DIM
990988

991989
# note: the second argument (now 'skipna') use to be 'dim'
@@ -1008,44 +1006,24 @@ def _resample(
10081006
dim_name: Hashable = dim
10091007
dim_coord = self[dim]
10101008

1011-
if loffset is not None:
1012-
emit_user_level_warning(
1013-
"Following pandas, the `loffset` parameter to resample is deprecated. "
1014-
"Switch to updating the resampled dataset time coordinate using "
1015-
"time offset arithmetic. For example:\n"
1016-
" >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n"
1017-
' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset',
1018-
FutureWarning,
1019-
)
1020-
1021-
if base is not None:
1022-
emit_user_level_warning(
1023-
"Following pandas, the `base` parameter to resample will be deprecated in "
1024-
"a future version of xarray. Switch to using `origin` or `offset` instead.",
1025-
FutureWarning,
1026-
)
1027-
1028-
if base is not None and offset is not None:
1029-
raise ValueError("base and offset cannot be present at the same time")
1030-
1031-
if base is not None:
1032-
index = self._indexes[dim_name].to_pandas_index()
1033-
offset = _convert_base_to_offset(base, freq, index)
1009+
group = DataArray(
1010+
dim_coord,
1011+
coords=dim_coord.coords,
1012+
dims=dim_coord.dims,
1013+
name=RESAMPLE_DIM,
1014+
)
10341015

1035-
grouper = TimeResampleGrouper(
1016+
grouper = TimeResampler(
10361017
freq=freq,
10371018
closed=closed,
10381019
label=label,
10391020
origin=origin,
10401021
offset=offset,
10411022
loffset=loffset,
1023+
base=base,
10421024
)
10431025

1044-
group = DataArray(
1045-
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
1046-
)
1047-
1048-
rgrouper = ResolvedTimeResampleGrouper(grouper, group, self)
1026+
rgrouper = ResolvedTimeResampler(grouper, group, self)
10491027

10501028
return resample_cls(
10511029
self,

xarray/core/groupby.py

Lines changed: 74 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from xarray.core.utils import (
3939
FrozenMappingWarningOnValuesAccess,
4040
either_dict_or_kwargs,
41+
emit_user_level_warning,
4142
hashable,
4243
is_scalar,
4344
maybe_wrap_array,
@@ -482,43 +483,66 @@ def _factorize(self, squeeze: bool) -> T_FactorizeOut:
482483

483484

484485
@dataclass
485-
class ResolvedTimeResampleGrouper(ResolvedGrouper):
486-
grouper: TimeResampleGrouper
486+
class ResolvedTimeResampler(ResolvedGrouper):
487+
grouper: TimeResampler
487488
index_grouper: CFTimeGrouper | pd.Grouper = field(init=False)
489+
group_as_index: pd.Index = field(init=False)
490+
491+
def __post_init__(self):
492+
if self.loffset is not None:
493+
emit_user_level_warning(
494+
"Following pandas, the `loffset` parameter to resample will be deprecated "
495+
"in a future version of xarray. Switch to using time offset arithmetic.",
496+
FutureWarning,
497+
)
488498

489-
def __post_init__(self) -> None:
490-
super().__post_init__()
499+
if self.base is not None:
500+
emit_user_level_warning(
501+
"Following pandas, the `base` parameter to resample will be deprecated in "
502+
"a future version of xarray. Switch to using `origin` or `offset` instead.",
503+
FutureWarning,
504+
)
505+
506+
if self.base is not None and self.offset is not None:
507+
raise ValueError("base and offset cannot be present at the same time")
491508

509+
def _init_properties(self, group):
492510
from xarray import CFTimeIndex
511+
from xarray.core.pdcompat import _convert_base_to_offset
493512

494-
group_as_index = safe_cast_to_index(self.group)
495-
self._group_as_index = group_as_index
513+
group_as_index = safe_cast_to_index(group)
514+
515+
if self.base is not None:
516+
# grouper constructor verifies that grouper.offset is None at this point
517+
offset = _convert_base_to_offset(self.base, self.freq, group_as_index)
518+
else:
519+
offset = self.offset
496520

497521
if not group_as_index.is_monotonic_increasing:
498522
# TODO: sort instead of raising an error
499523
raise ValueError("index must be monotonic for resampling")
500524

501-
grouper = self.grouper
502525
if isinstance(group_as_index, CFTimeIndex):
503526
from xarray.core.resample_cftime import CFTimeGrouper
504527

505528
index_grouper = CFTimeGrouper(
506-
freq=grouper.freq,
507-
closed=grouper.closed,
508-
label=grouper.label,
509-
origin=grouper.origin,
510-
offset=grouper.offset,
511-
loffset=grouper.loffset,
529+
freq=self.freq,
530+
closed=self.closed,
531+
label=self.label,
532+
origin=self.origin,
533+
offset=offset,
534+
loffset=self.loffset,
512535
)
513536
else:
514537
index_grouper = pd.Grouper(
515-
freq=grouper.freq,
516-
closed=grouper.closed,
517-
label=grouper.label,
518-
origin=grouper.origin,
519-
offset=grouper.offset,
538+
freq=self.freq,
539+
closed=self.closed,
540+
label=self.label,
541+
origin=self.origin,
542+
offset=offset,
520543
)
521544
self.index_grouper = index_grouper
545+
self.group_as_index = group_as_index
522546

523547
def _get_index_and_items(self) -> tuple[pd.Index, pd.Series, np.ndarray]:
524548
first_items, codes = self.first_items()
@@ -543,22 +567,21 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]:
543567
# So for _flox_reduce we avoid one reindex and copy by avoiding
544568
# _maybe_restore_empty_groups
545569
codes = np.repeat(np.arange(len(first_items)), counts)
546-
if self.grouper.loffset is not None:
547-
_apply_loffset(self.grouper.loffset, first_items)
570+
if self.loffset is not None:
571+
_apply_loffset(self.loffset, first_items)
548572
return first_items, codes
549573

550-
def _factorize(self, squeeze: bool) -> T_FactorizeOut:
574+
def _factorize(self, group) -> T_FactorizeOut:
575+
self._init_properties(group)
551576
full_index, first_items, codes_ = self._get_index_and_items()
552577
sbins = first_items.values.astype(np.int64)
553578
group_indices: T_GroupIndices = [
554579
slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])
555580
]
556581
group_indices += [slice(sbins[-1], None)]
557582

558-
unique_coord = IndexVariable(
559-
self.group.name, first_items.index, self.group.attrs
560-
)
561-
codes = self.group.copy(data=codes_)
583+
unique_coord = IndexVariable(group.name, first_items.index, group.attrs)
584+
codes = group.copy(data=codes_)
562585

563586
return codes, group_indices, unique_coord, full_index
564587

@@ -583,13 +606,32 @@ def __post_init__(self) -> None:
583606

584607

585608
@dataclass
586-
class TimeResampleGrouper(Grouper):
609+
class TimeResampler(Grouper):
587610
freq: str
588-
closed: SideOptions | None
589-
label: SideOptions | None
590-
origin: str | DatetimeLike | None
591-
offset: pd.Timedelta | datetime.timedelta | str | None
592-
loffset: datetime.timedelta | str | None
611+
closed: SideOptions | None = field(default=None)
612+
label: SideOptions | None = field(default=None)
613+
origin: str | DatetimeLike = field(default="start_day")
614+
offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
615+
loffset: datetime.timedelta | str | None = field(default=None)
616+
base: str | None = field(default=None)
617+
618+
def __post_init__(self):
619+
if self.loffset is not None:
620+
emit_user_level_warning(
621+
"Following pandas, the `loffset` parameter to resample will be deprecated "
622+
"in a future version of xarray. Switch to using time offset arithmetic.",
623+
FutureWarning,
624+
)
625+
626+
if self.base is not None:
627+
emit_user_level_warning(
628+
"Following pandas, the `base` parameter to resample will be deprecated in "
629+
"a future version of xarray. Switch to using `origin` or `offset` instead.",
630+
FutureWarning,
631+
)
632+
633+
if self.base is not None and self.offset is not None:
634+
raise ValueError("base and offset cannot be present at the same time")
593635

594636

595637
def _validate_groupby_squeeze(squeeze: bool) -> None:
@@ -936,7 +978,7 @@ def _maybe_restore_empty_groups(self, combined):
936978
"""
937979
(grouper,) = self.groupers
938980
if (
939-
isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampleGrouper))
981+
isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampler))
940982
and grouper.name in combined.dims
941983
):
942984
indexers = {grouper.name: grouper.full_index}

0 commit comments

Comments
 (0)