Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ Styler

Other
^^^^^
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` in incorrectly allowing non-fixed ``freq`` when resampling on a :class:`TimedeltaIndex` (:issue:`51896`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably go in Groupby/resample/rolling

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved+green

-

.. ***DO NOT USE THIS SECTION***

Expand Down
10 changes: 7 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,8 @@ def __init__(
values = values.copy()
if freq:
freq = to_offset(freq)
if values.dtype.kind == "m" and not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")

NDArrayBacked.__init__(self, values=values, dtype=dtype)
self._freq = freq
Expand All @@ -1871,6 +1873,8 @@ def freq(self, value) -> None:
if value is not None:
value = to_offset(value)
self._validate_frequency(self, value)
if self.dtype.kind == "m" and not isinstance(value, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")

if self.ndim > 1:
raise ValueError("Cannot set freq with ndim > 1")
Expand Down Expand Up @@ -2064,9 +2068,9 @@ def _with_freq(self, freq):
# Always valid
pass
elif len(self) == 0 and isinstance(freq, BaseOffset):
# Always valid. In the TimedeltaArray case, we assume this
# is a Tick offset.
pass
# Always valid. In the TimedeltaArray case, we require a Tick offset
if self.dtype.kind == "m" and not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
else:
# As an internal method, we can ensure this assertion always holds
assert freq == "infer"
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def _simple_new( # type: ignore[override]
assert not tslibs.is_unitless(dtype)
assert isinstance(values, np.ndarray), type(values)
assert dtype == values.dtype
assert freq is None or isinstance(freq, Tick)

result = super()._simple_new(values=values, dtype=dtype)
result._freq = freq
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1826,6 +1826,13 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex):
f"an instance of {type(ax).__name__}"
)

if not isinstance(self.freq, Tick):
# GH#51896
raise ValueError(
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
f"e.g. '24H' or '3D', not {self.freq}"
)

if not len(ax):
binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name)
return binner, [], labels
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/timedeltas/test_freq_attr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DateOffset,
Day,
Hour,
MonthEnd,
)


Expand All @@ -25,6 +26,16 @@ def test_freq_setter(self, values, freq):
idx._data.freq = None
assert idx.freq is None

def test_with_freq_empty_requires_tick(self):
idx = TimedeltaIndex([])

off = MonthEnd(1)
msg = "TimedeltaArray/Index freq must be a Tick"
with pytest.raises(TypeError, match=msg):
idx._with_freq(off)
with pytest.raises(TypeError, match=msg):
idx._data._with_freq(off)

def test_freq_setter_errors(self):
# GH#20678
idx = TimedeltaIndex(["0 days", "2 days", "4 days"])
Expand Down
71 changes: 66 additions & 5 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
Expand Down Expand Up @@ -110,7 +111,17 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method, request)
)

ser = empty_series_dti
result = getattr(ser.resample(freq), resample_method)()
if freq == "M" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return

rs = ser.resample(freq)
result = getattr(rs, resample_method)()

expected = ser.copy()
expected.index = _asfreq_compat(ser.index, freq)
Expand Down Expand Up @@ -150,11 +161,23 @@ def test_resample_nat_index_series(request, freq, series, resample_method):
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
# GH28427
result = getattr(empty_series_dti.resample(freq), resample_method)()
ser = empty_series_dti
if freq == "M" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return

rs = ser.resample(freq)

result = getattr(rs, resample_method)()

index = _asfreq_compat(empty_series_dti.index, freq)
index = _asfreq_compat(ser.index, freq)

expected = Series([], dtype="int64", index=index, name=empty_series_dti.name)
expected = Series([], dtype="int64", index=index, name=ser.name)

tm.assert_series_equal(result, expected)

Expand All @@ -165,7 +188,17 @@ def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
# GH13212
df = empty_frame_dti
# count retains dimensions too
result = getattr(df.resample(freq, group_keys=False), resample_method)()
if freq == "M" and isinstance(df.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
df.resample(freq, group_keys=False)
return

rs = df.resample(freq, group_keys=False)
result = getattr(rs, resample_method)()
if resample_method != "size":
expected = df.copy()
else:
Expand All @@ -188,6 +221,15 @@ def test_resample_count_empty_dataframe(freq, empty_frame_dti):

empty_frame_dti["a"] = []

if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return

result = empty_frame_dti.resample(freq).count()

index = _asfreq_compat(empty_frame_dti.index, freq)
Expand All @@ -204,6 +246,15 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti):

empty_frame_dti["a"] = []

if freq == "M" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return

result = empty_frame_dti.resample(freq).size()

index = _asfreq_compat(empty_frame_dti.index, freq)
Expand Down Expand Up @@ -233,6 +284,16 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
def test_apply_to_empty_series(empty_series_dti, freq):
# GH 14313
ser = empty_series_dti

if freq == "M" and isinstance(empty_series_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24H' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_series_dti.resample(freq)
return

result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
expected = ser.resample(freq).apply(np.sum)

Expand Down