Skip to content

Commit c897609

Browse files
committed
BUG: loffset not applied when using resample with agg() (GH13218)
1 parent f363236 commit c897609

File tree

3 files changed

+114
-20
lines changed

3 files changed

+114
-20
lines changed

doc/source/whatsnew/v0.19.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,3 +1570,4 @@ Bug Fixes
15701570
- Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`)
15711571
- Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`)
15721572
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
1573+
- Bug in ``resample`` where ``loffset`` was not applied when calling ``resample.agg()``, without a ``str`` argument, on a timeseries (:issue:`13218`)

pandas/tseries/resample.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,11 @@ def aggregate(self, arg, *args, **kwargs):
323323
*args,
324324
**kwargs)
325325

326+
# if arg was a string, _aggregate called resampler's _downsample or
327+
# _groupby_and_agg methods, which would've already applied the loffset
328+
if not isinstance(arg, compat.string_types):
329+
result = self._apply_loffset(result)
330+
326331
return result
327332

328333
agg = aggregate
@@ -381,7 +386,7 @@ def _gotitem(self, key, ndim, subset=None):
381386
return grouped
382387

383388
def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
384-
""" revaluate the obj with a groupby aggregation """
389+
""" re-evaluate the obj with a groupby aggregation """
385390

386391
if grouper is None:
387392
self._set_binner()
@@ -409,7 +414,14 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
409414
return self._wrap_result(result)
410415

411416
def _apply_loffset(self, result):
412-
"""if loffset if set, offset the result index"""
417+
"""
418+
if loffset is set, offset the result index
419+
420+
Parameters
421+
----------
422+
result : Series or DataFrame
423+
the result of resample
424+
"""
413425
loffset = self.loffset
414426
if isinstance(loffset, compat.string_types):
415427
loffset = to_offset(self.loffset)
@@ -419,6 +431,7 @@ def _apply_loffset(self, result):
419431
isinstance(result.index, DatetimeIndex) and
420432
len(result.index) > 0
421433
)
434+
422435
if needs_offset:
423436
result.index = result.index + loffset
424437

@@ -797,6 +810,11 @@ def aggregate(self, arg, *args, **kwargs):
797810
if result is None:
798811
result = self._downsample(arg, *args, **kwargs)
799812

813+
# if arg was a string, _aggregate called resamplers' _downsample or
814+
# _groupby_and_agg methods, which would've already applied the loffset
815+
if not isinstance(arg, compat.string_types):
816+
result = self._apply_loffset(result)
817+
800818
return result
801819

802820
agg = aggregate

pandas/tseries/tests/test_resample.py

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,25 +1168,35 @@ def test_resample_loffset(self):
11681168

11691169
def test_resample_loffset_count(self):
11701170
# GH 12725
1171-
start_time = '1/1/2000 00:00:00'
1172-
rng = date_range(start_time, periods=100, freq='S')
1173-
ts = Series(np.random.randn(len(rng)), index=rng)
1174-
1175-
result = ts.resample('10S', loffset='1s').count()
1176-
1177-
expected_index = (
1178-
date_range(start_time, periods=10, freq='10S') +
1179-
timedelta(seconds=1)
1180-
)
1181-
expected = pd.Series(10, index=expected_index)
1182-
1183-
assert_series_equal(result, expected)
1184-
1185-
# Same issue should apply to .size() since it goes through
1186-
# same code path
1187-
result = ts.resample('10S', loffset='1s').size()
1171+
s = self.create_series()
1172+
df = s.to_frame('value')
1173+
result = df.resample('2D', loffset='2H').count()
1174+
expected_index = DatetimeIndex(start=df.index[0],
1175+
freq='2D',
1176+
periods=len(df.index) / 2)
1177+
expected_index = expected_index + timedelta(hours=2)
1178+
expected = DataFrame({'value': 2},
1179+
index=expected_index)
1180+
assert_frame_equal(result, expected)
11881181

1189-
assert_series_equal(result, expected)
1182+
def test_resample_loffset_agg(self):
1183+
# GH 13218
1184+
s = self.create_series()
1185+
expected_means = [s.values[i:i + 2].mean()
1186+
for i in range(0, len(s.values), 2)]
1187+
df = s.to_frame('value')
1188+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
1189+
result = df.resample('2D', loffset='2H').agg(arg)
1190+
expected_index = DatetimeIndex(start=df.index[0],
1191+
freq='2D',
1192+
periods=len(df.index) / 2)
1193+
expected_index = expected_index + timedelta(hours=2)
1194+
expected = DataFrame({'value': expected_means},
1195+
index=expected_index)
1196+
if isinstance(arg, list):
1197+
expected.columns = pd.MultiIndex.from_tuples([('value',
1198+
'mean')])
1199+
assert_frame_equal(result, expected)
11901200

11911201
def test_resample_upsample(self):
11921202
# from daily
@@ -2629,6 +2639,36 @@ def test_evenly_divisible_with_no_extra_bins(self):
26292639
result = df.resample('7D').sum()
26302640
assert_frame_equal(result, expected)
26312641

2642+
def test_resample_loffset_count(self):
2643+
# GH 12725
2644+
s = self.create_series()
2645+
df = s.to_frame('value')
2646+
result = df.resample('2D', loffset='2H').count()
2647+
expected_index = df.index.take(
2648+
np.arange(0, len(df.index), 2)).to_datetime()
2649+
expected_index = expected_index + timedelta(hours=2)
2650+
expected = DataFrame({'value': 2},
2651+
index=expected_index)
2652+
assert_frame_equal(result, expected)
2653+
2654+
def test_resample_loffset_agg(self):
2655+
# GH 13218
2656+
s = self.create_series()
2657+
expected_means = [s.values[i:i + 2].mean()
2658+
for i in range(0, len(s.values), 2)]
2659+
df = s.to_frame('value')
2660+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
2661+
result = df.resample('2D', loffset='2H').agg(arg)
2662+
expected_index = df.index.take(
2663+
np.arange(0, len(df.index), 2)).to_datetime()
2664+
expected_index = expected_index + timedelta(hours=2)
2665+
expected = DataFrame({'value': expected_means},
2666+
index=expected_index)
2667+
if isinstance(arg, list):
2668+
expected.columns = pd.MultiIndex.from_tuples([('value',
2669+
'mean')])
2670+
assert_frame_equal(result, expected)
2671+
26322672

26332673
class TestTimedeltaIndex(Base, tm.TestCase):
26342674
_multiprocess_can_split_ = True
@@ -2651,6 +2691,41 @@ def test_asfreq_bug(self):
26512691
freq='1T'))
26522692
assert_frame_equal(result, expected)
26532693

2694+
def test_resample_loffset_count(self):
2695+
# GH 12725
2696+
s = self.create_series()
2697+
df = s.to_frame('value')
2698+
result = df.resample('2D', loffset='2H').count()
2699+
2700+
# GH 13022, 7687 resample w/ TimedeltaIndex results in incorrect index
2701+
expected_index = timedelta_range(start=df.index[0],
2702+
freq='2D',
2703+
periods=len(df.index) / 2)
2704+
expected = DataFrame({'value': 2},
2705+
index=expected_index)
2706+
with tm.assertRaises(AssertionError):
2707+
assert_frame_equal(result, expected)
2708+
2709+
def test_resample_loffset_agg(self):
2710+
# GH 13218
2711+
s = self.create_series()
2712+
expected_means = [s.values[i:i + 2].mean()
2713+
for i in range(0, len(s.values), 2)]
2714+
df = s.to_frame('value')
2715+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
2716+
result = df.resample('2D', loffset='2H').agg(arg)
2717+
expected_index = timedelta_range(start=df.index[0],
2718+
freq='2D',
2719+
periods=len(df.index) / 2)
2720+
expected = DataFrame({'value': expected_means},
2721+
index=expected_index)
2722+
if isinstance(arg, list):
2723+
expected.columns = pd.MultiIndex.from_tuples([('value',
2724+
'mean')])
2725+
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
2726+
with tm.assertRaises(AssertionError):
2727+
assert_frame_equal(result, expected)
2728+
26542729

26552730
class TestResamplerGrouper(tm.TestCase):
26562731
def setUp(self):

0 commit comments

Comments
 (0)