Skip to content

Commit d7640e8

Browse files
author
Eric Kisslinger
committed
BUG: Fix groupby over a CategoricalIndex in axis=1
closes GH18432 Add multi-index columns test to test_groupby_categorical_columns_index() Add whatsnew for GH18432 bug fix Fix ValueError text for GH18432 bug fix Update whatsnew text Use kwargs instead of positional format params Move test_groupby_categorical_columns_index() to pandas/tests/groupby/test_grouping.py
1 parent 262e8ff commit d7640e8

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ Categorical
137137
- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`)
138138
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
139139
- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`)
140+
- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`)
140141

141142
String
142143
^^^^^^

pandas/core/groupby.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2859,9 +2859,11 @@ def is_in_obj(gpr):
28592859
else:
28602860
in_axis, name = False, None
28612861

2862-
if is_categorical_dtype(gpr) and len(gpr) != len(obj):
2863-
raise ValueError("Categorical dtype grouper must "
2864-
"have len(grouper) == len(data)")
2862+
if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
2863+
raise ValueError(
2864+
("Length of grouper ({len_gpr}) and axis ({len_axis})"
2865+
" must be same length"
2866+
.format(len_gpr=len(gpr), len_axis=obj.shape[axis])))
28652867

28662868
# create the Grouping
28672869
# allow us to passing the actual Grouping as the gpr

pandas/tests/groupby/test_grouping.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from warnings import catch_warnings
88
from pandas import (date_range, Timestamp,
9-
Index, MultiIndex, DataFrame, Series)
9+
Index, MultiIndex, DataFrame, Series, CategoricalIndex)
1010
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
1111
assert_series_equal, assert_almost_equal)
1212
from pandas.compat import lrange, long
@@ -251,6 +251,37 @@ def test_groupby_levels_and_columns(self):
251251
by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64)
252252
tm.assert_frame_equal(by_levels, by_columns)
253253

254+
def test_groupby_categorical_index_and_columns(self):
255+
# GH18432
256+
columns = ['A', 'B', 'A', 'B']
257+
categories = ['B', 'A']
258+
cat_columns = CategoricalIndex(columns,
259+
categories=categories,
260+
ordered=True)
261+
data = np.random.rand(5, len(columns))
262+
df = DataFrame(data, columns=cat_columns)
263+
result = df.groupby(axis=1, level=0).sum()
264+
df = DataFrame(data, columns=columns)
265+
expected = df.groupby(axis=1, level=0).sum()
266+
expected_cat_columns = CategoricalIndex(expected.columns,
267+
categories=categories,
268+
ordered=True)
269+
expected.columns = expected_cat_columns
270+
expected = expected.sort_index(axis=1)
271+
assert_frame_equal(result, expected)
272+
273+
# test transposed version
274+
df = DataFrame(data.T, index=cat_columns)
275+
result = df.groupby(axis=0, level=0).sum()
276+
df = DataFrame(data.T, index=columns)
277+
expected = df.groupby(axis=0, level=0).sum()
278+
expected_cat_index = CategoricalIndex(expected.index,
279+
categories=categories,
280+
ordered=True)
281+
expected.index = expected_cat_index
282+
expected = expected.sort_index()
283+
assert_frame_equal(result, expected)
284+
254285
def test_grouper_getting_correct_binner(self):
255286

256287
# GH 10063

0 commit comments

Comments
 (0)