From 4c7e66dd91dbd2b5f7298911b777f1757d62d839 Mon Sep 17 00:00:00 2001 From: Matt Roeschke <mroeschke@housecanary.com> Date: Sun, 10 Feb 2019 22:59:26 -0800 Subject: [PATCH 1/5] BUG: groupby.transform retains timezone information --- pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_transform.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 27e13e86a6e9e..52056a6842ed9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -964,7 +964,7 @@ def _transform_fast(self, func, func_nm): ids, _, ngroup = self.grouper.group_info cast = self._transform_should_cast(func_nm) - out = algorithms.take_1d(func().values, ids) + out = algorithms.take_1d(func()._values, ids) if cast: out = self._try_cast(out, self.obj) return Series(out, index=self.obj.index, name=self.obj.name) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index f120402e6e8ca..52aa0e6da909d 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -834,3 +834,13 @@ def demean_rename(x): tm.assert_frame_equal(result, expected) result_single = df.groupby('group').value.transform(demean_rename) tm.assert_series_equal(result_single, expected['value']) + + +def test_groupby_transform_timezone_column(): + # GH 24198 + ts = pd.to_datetime('now', utc=True).tz_convert('Asia/Singapore') + result = pd.DataFrame({'end_time': [ts], 'id': [1]}) + result['max_end_time'] = result.groupby('id').end_time.transform(max) + expected = pd.DataFrame([[ts, 1, ts]], columns=['end_time', 'id', + 'max_end_time']) + tm.assert_frame_equal(result, expected) From 3c5fd19b9dd475ce942ffc5dbce40d65d71ccb71 Mon Sep 17 00:00:00 2001 From: Matt Roeschke <mroeschke@housecanary.com> Date: Sun, 10 Feb 2019 23:00:25 -0800 Subject: [PATCH 2/5] Add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e19..309a9e336f92f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -180,6 +180,7 @@ Reshaping - Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) +- Bug in :meth:`pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - From 032e201693ffb80086a6b33a041d6e91832ff20d Mon Sep 17 00:00:00 2001 From: Matt Roeschke <mroeschke@housecanary.com> Date: Mon, 11 Feb 2019 09:54:42 -0800 Subject: [PATCH 3/5] Move whatsnew to 0.24.2 --- doc/source/whatsnew/v0.24.2.rst | 2 +- doc/source/whatsnew/v0.25.0.rst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index b0f287cf0b9f6..0526ae77f87cb 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -76,7 +76,7 @@ Bug Fixes **Reshaping** -- +- Bug in :meth:`pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - - diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 309a9e336f92f..4032dc20b2e19 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -180,7 +180,6 @@ Reshaping - Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) -- Bug in :meth:`pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - From ab34a5ce6a79d11560a91bd0b5cfd8cadf99dc06 Mon Sep 17 00:00:00 2001 From: Matt Roeschke <mroeschke@housecanary.com> Date: Mon, 11 Feb 2019 13:48:12 -0800 Subject: [PATCH 4/5] Add more tests --- pandas/tests/groupby/test_transform.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 52aa0e6da909d..19de213903f0a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -836,11 +836,12 @@ def demean_rename(x): tm.assert_series_equal(result_single, expected['value']) -def test_groupby_transform_timezone_column(): +@pytest.mark.parametrize('func', [min, max, np.min, np.max]) +def test_groupby_transform_timezone_column(func): # GH 24198 ts = pd.to_datetime('now', utc=True).tz_convert('Asia/Singapore') result = pd.DataFrame({'end_time': [ts], 'id': [1]}) - result['max_end_time'] = result.groupby('id').end_time.transform(max) + result['max_end_time'] = result.groupby('id').end_time.transform(func) expected = pd.DataFrame([[ts, 1, ts]], columns=['end_time', 'id', 'max_end_time']) tm.assert_frame_equal(result, expected) From dfe61248308350c09bcaa4484ec38372a1e1fcd9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke <mroeschke@housecanary.com> Date: Mon, 11 Feb 2019 21:12:58 -0800 Subject: [PATCH 5/5] Add first last --- pandas/tests/groupby/test_transform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 19de213903f0a..b645073fcf72a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -836,7 +836,7 @@ def demean_rename(x): tm.assert_series_equal(result_single, expected['value']) -@pytest.mark.parametrize('func', [min, max, np.min, np.max]) +@pytest.mark.parametrize('func', [min, max, np.min, np.max, 'first', 'last']) def test_groupby_transform_timezone_column(func): # GH 24198 ts = pd.to_datetime('now', utc=True).tz_convert('Asia/Singapore')