From ce595b9f3488802a5550edb6429df7d0516208b2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 20 Oct 2016 19:30:46 -0400 Subject: [PATCH] BUG: incorrect broadcasting that could casuse dtype coercion in a groupby-transform closes #14457 --- doc/source/whatsnew/v0.19.1.txt | 4 ++++ pandas/core/groupby.py | 21 +++++++++++++++------ pandas/tests/test_groupby.py | 12 ++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 8bebe5e782e3c..96ea1c4f1ea20 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -43,9 +43,13 @@ Bug Fixes - Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`) - Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`) + - Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`). - Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`) +- Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`) + + - Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`). diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5223c0ac270f3..4c200e28d9b47 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3454,7 +3454,6 @@ def _transform_general(self, func, *args, **kwargs): from pandas.tools.merge import concat applied = [] - obj = self._obj_with_exclusions gen = self.grouper.get_iterator(obj, axis=self.axis) fast_path, slow_path = self._define_paths(func, *args, **kwargs) @@ -3475,14 +3474,24 @@ def _transform_general(self, func, *args, **kwargs): else: res = path(group) - # broadcasting if isinstance(res, Series): - if res.index.is_(obj.index): - group.T.values[:] = res + + # we need to broadcast across the + # other dimension; this will preserve dtypes + # GH14457 + if not np.prod(group.shape): + continue + elif res.index.is_(obj.index): + r = concat([res] * len(group.columns), axis=1) + r.columns = group.columns + r.index = group.index else: - group.values[:] = res + r = DataFrame( + np.concatenate([res.values] * len(group.index) + ).reshape(group.shape), + columns=group.columns, index=group.index) - applied.append(group) + applied.append(r) else: applied.append(res) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index f3791ee1d5c91..f64e1966c0601 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1336,6 +1336,18 @@ def nsum(x): for result in results: assert_series_equal(result, expected, check_names=False) + def test_transform_coercion(self): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = pd.DataFrame(dict(A=['a', 'a'], B=[0, 1])) + g = df.groupby('A') + + expected = g.transform(np.mean) + result = g.transform(lambda x: np.mean(x)) + assert_frame_equal(result, expected) + def test_with_na(self): index = Index(np.arange(10))