BUG: loffset not applied when using resample with agg() (GH13218)

wcwagner · wcwagner · commit c897609f3003 · 2016-09-13T12:19:00.000-04:00
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -1570,3 +1570,4 @@ Bug Fixes
 - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`)
 - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`)
 - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
+- Bug in ``resample`` where ``loffset`` was not applied when calling ``resample.agg()``, without a ``str`` argument, on a timeseries (:issue:`13218`)
diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
@@ -323,6 +323,11 @@ def aggregate(self, arg, *args, **kwargs):
                                                *args,
                                                **kwargs)
 
+        # if arg was a string, _aggregate called resampler's _downsample or
+        # _groupby_and_agg methods, which would've already applied the loffset
+        if not isinstance(arg, compat.string_types):
+            result = self._apply_loffset(result)
+
         return result
 
     agg = aggregate
@@ -381,7 +386,7 @@ def _gotitem(self, key, ndim, subset=None):
             return grouped
 
     def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
-        """ revaluate the obj with a groupby aggregation """
+        """ re-evaluate the obj with a groupby aggregation """
 
         if grouper is None:
             self._set_binner()
@@ -409,7 +414,14 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
         return self._wrap_result(result)
 
     def _apply_loffset(self, result):
-        """if loffset if set, offset the result index"""
+        """
+        if loffset is set, offset the result index
+
+        Parameters
+        ----------
+        result : Series or DataFrame
+            the result of resample
+        """
         loffset = self.loffset
         if isinstance(loffset, compat.string_types):
             loffset = to_offset(self.loffset)
@@ -419,6 +431,7 @@ def _apply_loffset(self, result):
             isinstance(result.index, DatetimeIndex) and
             len(result.index) > 0
         )
+
         if needs_offset:
             result.index = result.index + loffset
 
@@ -797,6 +810,11 @@ def aggregate(self, arg, *args, **kwargs):
         if result is None:
             result = self._downsample(arg, *args, **kwargs)
 
+        # if arg was a string, _aggregate called resamplers' _downsample or
+        # _groupby_and_agg methods, which would've already applied the loffset
+        if not isinstance(arg, compat.string_types):
+            result = self._apply_loffset(result)
+
         return result
 
     agg = aggregate
diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
@@ -1168,25 +1168,35 @@ def test_resample_loffset(self):
 
     def test_resample_loffset_count(self):
         # GH 12725
-        start_time = '1/1/2000 00:00:00'
-        rng = date_range(start_time, periods=100, freq='S')
-        ts = Series(np.random.randn(len(rng)), index=rng)
-
-        result = ts.resample('10S', loffset='1s').count()
-
-        expected_index = (
-            date_range(start_time, periods=10, freq='10S') +
-            timedelta(seconds=1)
-        )
-        expected = pd.Series(10, index=expected_index)
-
-        assert_series_equal(result, expected)
-
-        # Same issue should apply to .size() since it goes through
-        #   same code path
-        result = ts.resample('10S', loffset='1s').size()
+        s = self.create_series()
+        df = s.to_frame('value')
+        result = df.resample('2D', loffset='2H').count()
+        expected_index = DatetimeIndex(start=df.index[0],
+                                       freq='2D',
+                                       periods=len(df.index) / 2)
+        expected_index = expected_index + timedelta(hours=2)
+        expected = DataFrame({'value': 2},
+                             index=expected_index)
+        assert_frame_equal(result, expected)
 
-        assert_series_equal(result, expected)
+    def test_resample_loffset_agg(self):
+        # GH 13218
+        s = self.create_series()
+        expected_means = [s.values[i:i + 2].mean()
+                          for i in range(0, len(s.values), 2)]
+        df = s.to_frame('value')
+        for arg in ['mean', {'value': 'mean'}, ['mean']]:
+            result = df.resample('2D', loffset='2H').agg(arg)
+            expected_index = DatetimeIndex(start=df.index[0],
+                                           freq='2D',
+                                           periods=len(df.index) / 2)
+            expected_index = expected_index + timedelta(hours=2)
+            expected = DataFrame({'value': expected_means},
+                                 index=expected_index)
+            if isinstance(arg, list):
+                expected.columns = pd.MultiIndex.from_tuples([('value',
+                                                               'mean')])
+            assert_frame_equal(result, expected)
 
     def test_resample_upsample(self):
         # from daily
@@ -2629,6 +2639,36 @@ def test_evenly_divisible_with_no_extra_bins(self):
         result = df.resample('7D').sum()
         assert_frame_equal(result, expected)
 
+    def test_resample_loffset_count(self):
+        # GH 12725
+        s = self.create_series()
+        df = s.to_frame('value')
+        result = df.resample('2D', loffset='2H').count()
+        expected_index = df.index.take(
+            np.arange(0, len(df.index), 2)).to_datetime()
+        expected_index = expected_index + timedelta(hours=2)
+        expected = DataFrame({'value': 2},
+                             index=expected_index)
+        assert_frame_equal(result, expected)
+
+    def test_resample_loffset_agg(self):
+        # GH 13218
+        s = self.create_series()
+        expected_means = [s.values[i:i + 2].mean()
+                          for i in range(0, len(s.values), 2)]
+        df = s.to_frame('value')
+        for arg in ['mean', {'value': 'mean'}, ['mean']]:
+            result = df.resample('2D', loffset='2H').agg(arg)
+            expected_index = df.index.take(
+                np.arange(0, len(df.index), 2)).to_datetime()
+            expected_index = expected_index + timedelta(hours=2)
+            expected = DataFrame({'value': expected_means},
+                                 index=expected_index)
+            if isinstance(arg, list):
+                expected.columns = pd.MultiIndex.from_tuples([('value',
+                                                               'mean')])
+            assert_frame_equal(result, expected)
+
 
 class TestTimedeltaIndex(Base, tm.TestCase):
     _multiprocess_can_split_ = True
@@ -2651,6 +2691,41 @@ def test_asfreq_bug(self):
                                                    freq='1T'))
         assert_frame_equal(result, expected)
 
+    def test_resample_loffset_count(self):
+        # GH 12725
+        s = self.create_series()
+        df = s.to_frame('value')
+        result = df.resample('2D', loffset='2H').count()
+
+        # GH 13022, 7687 resample w/ TimedeltaIndex results in incorrect index
+        expected_index = timedelta_range(start=df.index[0],
+                                         freq='2D',
+                                         periods=len(df.index) / 2)
+        expected = DataFrame({'value': 2},
+                             index=expected_index)
+        with tm.assertRaises(AssertionError):
+            assert_frame_equal(result, expected)
+
+    def test_resample_loffset_agg(self):
+        # GH 13218
+        s = self.create_series()
+        expected_means = [s.values[i:i + 2].mean()
+                          for i in range(0, len(s.values), 2)]
+        df = s.to_frame('value')
+        for arg in ['mean', {'value': 'mean'}, ['mean']]:
+            result = df.resample('2D', loffset='2H').agg(arg)
+            expected_index = timedelta_range(start=df.index[0],
+                                             freq='2D',
+                                             periods=len(df.index) / 2)
+            expected = DataFrame({'value': expected_means},
+                                 index=expected_index)
+            if isinstance(arg, list):
+                expected.columns = pd.MultiIndex.from_tuples([('value',
+                                                               'mean')])
+            # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
+            with tm.assertRaises(AssertionError):
+                assert_frame_equal(result, expected)
+
 
 class TestResamplerGrouper(tm.TestCase):
     def setUp(self):