diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2b147f948adb1..cbbbfff797ac4 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -284,6 +284,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`) - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) +- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2d37121d28308..d7eaaca5ac83a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -330,7 +330,8 @@ def __init__( self._group_index = CategoricalIndex( Categorical.from_codes( codes=codes, categories=categories, ordered=self.grouper.ordered - ) + ), + name=self.name, ) # we are done diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index fcc0aa3b1c015..490ecaab03dab 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -674,7 +674,7 @@ def test_preserve_categories(): # ordered=True df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=True)}) - index = CategoricalIndex(categories, categories, ordered=True) + index = CategoricalIndex(categories, categories, ordered=True, name="A") tm.assert_index_equal( df.groupby("A", sort=True, observed=False).first().index, index ) @@ -684,8 +684,8 @@ def test_preserve_categories(): # ordered=False df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=False)}) - sort_index = CategoricalIndex(categories, categories, ordered=False) - nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False) + sort_index = CategoricalIndex(categories, categories, ordered=False, name="A") + nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False, name="A") tm.assert_index_equal( df.groupby("A", sort=True, observed=False).first().index, sort_index ) @@ -1193,3 +1193,17 @@ def test_groupby_categorical_axis_1(code): result = df.groupby(cat, axis=1).mean() expected = df.T.groupby(cat, axis=0).mean().T assert_frame_equal(result, expected) + + +def test_groupby_cat_preserves_structure(observed): + # GH 28787 + df = DataFrame([("Bob", 1), ("Greg", 2)], columns=["Name", "Item"]) + expected = df.copy() + + result = ( + df.groupby("Name", observed=observed) + .agg(pd.DataFrame.sum, skipna=True) + .reset_index() + ) + + assert_frame_equal(result, expected)