From 26b62de6feea023daf4ab97656af153490775e20 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 3 Dec 2020 16:19:00 +0800 Subject: [PATCH 1/2] fix-apply --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_apply.py | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1f8fa1e2072fd..1f7985dfb5bff 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -734,6 +734,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) - Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7c97725f1264c..b0101be0614f4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1212,7 +1212,7 @@ def reset_identity(values): if not not_indexed_same: result = concat(values, axis=self.axis) - ax = self._selected_obj._get_axis(self.axis) + ax = self.filter(lambda x:True).axes[self.axis] # this is a very unfortunate situation # we can't use reindex to restore the original order diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b2074dcb08c95..975cebe16dc55 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1087,3 +1087,25 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): tm.assert_frame_equal(by_cols, by_rows.T) tm.assert_frame_equal(by_cols, df) + + +def test_apply_dropna_with_indexed_same(): + # GH 38227 + + df = DataFrame( + { + "col": [1, 2, 3, 4, 5], + "group": ["a", np.nan, np.nan, "b", "b"], + }, + index=list("xxyxz"), + ) + result = df.groupby("group").apply(lambda x: x) + expected = DataFrame( + { + "col": [1, 4, 5], + "group": ["a", "b", "b"], + }, + index=list("xxz"), + ) + + tm.assert_frame_equal(result, expected) From a7cb11bc383e3fe0f474305472e7bd8ed5b23712 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Thu, 3 Dec 2020 16:24:00 +0800 Subject: [PATCH 2/2] Update groupby.py --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b0101be0614f4..c5a809a56ccae 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1212,7 +1212,7 @@ def reset_identity(values): if not not_indexed_same: result = concat(values, axis=self.axis) - ax = self.filter(lambda x:True).axes[self.axis] + ax = self.filter(lambda x: True).axes[self.axis] # this is a very unfortunate situation # we can't use reindex to restore the original order