Skip to content
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ Reshaping

-
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)


Sparse
^^^^^^
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,8 @@ def crosstab(
from pandas import DataFrame

df = DataFrame(data, index=common_idx)
original_df_cols = df.columns

if values is None:
df["__dummy__"] = 0
kwargs = {"aggfunc": len, "fill_value": 0}
Expand All @@ -589,7 +591,7 @@ def crosstab(
kwargs = {"aggfunc": aggfunc}

table = df.pivot_table(
"__dummy__",
["__dummy__"],
index=rownames,
columns=colnames,
margins=margins,
Expand All @@ -598,6 +600,12 @@ def crosstab(
**kwargs,
)

# GH18321, after pivoting, an extra top level of column index of `__dummy__` is
# created, and this extra level should not be included in the further steps
if not table.empty:
cols_diff = df.columns.difference(original_df_cols)[0]
table = table[cols_diff]

# Post-process
if normalize is not False:
table = _normalize(
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2557,6 +2557,19 @@ def test_crosstab_tuple_name(self, names):
result = pd.crosstab(s1, s2)
tm.assert_frame_equal(result, expected)

def test_crosstab_both_tuple_names(self):
# GH 18321
s1 = pd.Series(range(3), name=("a", "b"))
s2 = pd.Series(range(3), name=("c", "d"))

expected = pd.DataFrame(
np.eye(3, dtype="int64"),
index=pd.Index(range(3), name=("a", "b")),
columns=pd.Index(range(3), name=("c", "d")),
)
result = crosstab(s1, s2)
tm.assert_frame_equal(result, expected)

def test_crosstab_unsorted_order(self):
df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
result = pd.crosstab(df.index, [df.b, df.a])
Expand Down