Skip to content

BUG: loffset not applied when using resample with agg() (GH13218) #13861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
@@ -975,3 +975,4 @@ Bug Fixes
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
- Bug in ``pd.to_datetime()`` did not cast floats correctly when ``unit`` was specified, resulting in truncated datetime (:issue:`13845`)
- Bug in ``resample`` where ``loffset`` was not applied when calling ``resample.agg()`` on a timeseries (:issue:`13218`)
19 changes: 17 additions & 2 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
@@ -309,6 +309,9 @@ def aggregate(self, arg, *args, **kwargs):
return self._groupby_and_aggregate(arg,
*args,
**kwargs)
# GH 13218
if isinstance(arg, (dict, list)):
result = self._apply_loffset(result)

return result

@@ -368,7 +371,7 @@ def _gotitem(self, key, ndim, subset=None):
return grouped

def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
""" revaluate the obj with a groupby aggregation """
""" re-evaluate the obj with a groupby aggregation """

if grouper is None:
self._set_binner()
@@ -396,7 +399,14 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
return self._wrap_result(result)

def _apply_loffset(self, result):
"""if loffset if set, offset the result index"""
"""
if loffset is set, offset the result index

Parameters
----------
result : Series or DataFrame
the result of resample
"""
loffset = self.loffset
if isinstance(loffset, compat.string_types):
loffset = to_offset(self.loffset)
@@ -406,6 +416,7 @@ def _apply_loffset(self, result):
isinstance(result.index, DatetimeIndex) and
len(result.index) > 0
)

if needs_offset:
result.index = result.index + loffset

@@ -771,6 +782,10 @@ def aggregate(self, arg, *args, **kwargs):
if result is None:
result = self._downsample(arg, *args, **kwargs)

# GH 13218
if isinstance(arg, (dict, list)):
result = self._apply_loffset(result)

return result

agg = aggregate
111 changes: 93 additions & 18 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
@@ -1098,25 +1098,35 @@ def test_resample_loffset(self):

def test_resample_loffset_count(self):
# GH 12725
start_time = '1/1/2000 00:00:00'
rng = date_range(start_time, periods=100, freq='S')
ts = Series(np.random.randn(len(rng)), index=rng)

result = ts.resample('10S', loffset='1s').count()

expected_index = (
date_range(start_time, periods=10, freq='10S') +
timedelta(seconds=1)
)
expected = pd.Series(10, index=expected_index)

assert_series_equal(result, expected)

# Same issue should apply to .size() since it goes through
# same code path
result = ts.resample('10S', loffset='1s').size()
s = self.create_series()
df = s.to_frame('value')
result = df.resample('2D', loffset='2H').count()
expected_index = DatetimeIndex(start=df.index[0],
freq='2D',
periods=len(df.index) / 2)
expected_index = expected_index + timedelta(hours=2)
expected = DataFrame({'value': 2},
index=expected_index)
assert_frame_equal(result, expected)

assert_series_equal(result, expected)
def test_resample_loffset_agg(self):
# GH 13218
s = self.create_series()
expected_means = [s.values[i:i + 2].mean()
for i in range(0, len(s.values), 2)]
df = s.to_frame('value')
for arg in ['mean', {'value': 'mean'}, ['mean']]:
result = df.resample('2D', loffset='2H').agg(arg)
expected_index = DatetimeIndex(start=df.index[0],
freq='2D',
periods=len(df.index) / 2)
expected_index = expected_index + timedelta(hours=2)
expected = DataFrame({'value': expected_means},
index=expected_index)
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([('value',
'mean')])
assert_frame_equal(result, expected)

def test_resample_upsample(self):
# from daily
@@ -2509,6 +2519,36 @@ def test_evenly_divisible_with_no_extra_bins(self):
result = df.resample('7D').sum()
assert_frame_equal(result, expected)

def test_resample_loffset_count(self):
# GH 12725
s = self.create_series()
df = s.to_frame('value')
result = df.resample('2D', loffset='2H').count()
expected_index = df.index.take(
np.arange(0, len(df.index), 2)).to_datetime()
expected_index = expected_index + timedelta(hours=2)
expected = DataFrame({'value': 2},
index=expected_index)
assert_frame_equal(result, expected)

def test_resample_loffset_agg(self):
# GH 13218
s = self.create_series()
expected_means = [s.values[i:i + 2].mean()
for i in range(0, len(s.values), 2)]
df = s.to_frame('value')
for arg in ['mean', {'value': 'mean'}, ['mean']]:
result = df.resample('2D', loffset='2H').agg(arg)
expected_index = df.index.take(
np.arange(0, len(df.index), 2)).to_datetime()
expected_index = expected_index + timedelta(hours=2)
expected = DataFrame({'value': expected_means},
index=expected_index)
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([('value',
'mean')])
assert_frame_equal(result, expected)


class TestTimedeltaIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
@@ -2531,6 +2571,41 @@ def test_asfreq_bug(self):
freq='1T'))
assert_frame_equal(result, expected)

def test_resample_loffset_count(self):
# GH 12725
s = self.create_series()
df = s.to_frame('value')
result = df.resample('2D', loffset='2H').count()

# GH 13022, 7687 resample w/ TimedeltaIndex results in incorrect index
expected_index = timedelta_range(start=df.index[0],
freq='2D',
periods=len(df.index) / 2)
expected = DataFrame({'value': 2},
index=expected_index)
with tm.assertRaises(AssertionError):
assert_frame_equal(result, expected)

def test_resample_loffset_agg(self):
# GH 13218
s = self.create_series()
expected_means = [s.values[i:i + 2].mean()
for i in range(0, len(s.values), 2)]
df = s.to_frame('value')
for arg in ['mean', {'value': 'mean'}, ['mean']]:
result = df.resample('2D', loffset='2H').agg(arg)
expected_index = timedelta_range(start=df.index[0],
freq='2D',
periods=len(df.index) / 2)
expected = DataFrame({'value': expected_means},
index=expected_index)
if isinstance(arg, list):
expected.columns = pd.MultiIndex.from_tuples([('value',
'mean')])
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
with tm.assertRaises(AssertionError):
assert_frame_equal(result, expected)


class TestResamplerGrouper(tm.TestCase):
def setUp(self):