Skip to content

Commit b65467a

Browse files
committed
pandas-dev#35028 DFGroupBy.count() now returns zero for missing categories when groupby by multiple categories
1 parent 42fd7e7 commit b65467a

File tree

2 files changed

+6
-13
lines changed

2 files changed

+6
-13
lines changed

pandas/core/groupby/generic.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import numpy as np
3232

3333
from pandas._libs import lib
34-
from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
34+
from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Scalar
3535
from pandas.util._decorators import Appender, Substitution, doc
3636

3737
from pandas.core.dtypes.cast import (
@@ -1788,7 +1788,9 @@ def _wrap_transformed_output(
17881788

17891789
return result
17901790

1791-
def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame:
1791+
def _wrap_agged_blocks(
1792+
self, blocks: "Sequence[Block]", items: Index, fill_value: Scalar = np.NaN
1793+
) -> DataFrame:
17921794
if not self.as_index:
17931795
index = np.arange(blocks[0].values.shape[-1])
17941796
mgr = BlockManager(blocks, axes=[items, index])
@@ -1804,7 +1806,7 @@ def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFra
18041806
if self.axis == 1:
18051807
result = result.T
18061808

1807-
return self._reindex_output(result)._convert(datetime=True)
1809+
return self._reindex_output(result, fill_value)._convert(datetime=True)
18081810

18091811
def _iterate_column_groupbys(self):
18101812
for i, colname in enumerate(self._selected_obj.columns):
@@ -1846,7 +1848,7 @@ def count(self):
18461848
)
18471849
blocks = [make_block(val, placement=loc) for val, loc in zip(counted, locs)]
18481850

1849-
return self._wrap_agged_blocks(blocks, items=data.items)
1851+
return self._wrap_agged_blocks(blocks, items=data.items, fill_value=0)
18501852

18511853
def nunique(self, dropna: bool = True):
18521854
"""

pandas/tests/groupby/test_categorical.py

-9
Original file line numberDiff line numberDiff line change
@@ -1412,15 +1412,6 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
14121412
if reduction_func == "ngroup":
14131413
pytest.skip("ngroup does not return the Categories on the index")
14141414

1415-
if reduction_func == "count": # GH 35028
1416-
mark = pytest.mark.xfail(
1417-
reason=(
1418-
"DataFrameGroupBy.count returns np.NaN for missing "
1419-
"categories, when it should return 0. See GH 35028"
1420-
)
1421-
)
1422-
request.node.add_marker(mark)
1423-
14241415
if reduction_func == "sum": # GH 31422
14251416
mark = pytest.mark.xfail(
14261417
reason=(

0 commit comments

Comments
 (0)