Skip to content

Commit 24d7c06

Browse files
charlesdong1991jreback
authored andcommitted
BUG: Fix MutliIndexed unstack failures at tuple names (#30943)
1 parent 469b4b7 commit 24d7c06

File tree

5 files changed

+200
-60
lines changed

5 files changed

+200
-60
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ Reshaping
141141

142142
-
143143
- Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`)
144+
- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`)
144145
- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`)
145146
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
146147
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)

pandas/core/reshape/reshape.py

+4
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,10 @@ def _unstack_multiple(data, clocs, fill_value=None):
317317

318318
index = data.index
319319

320+
# GH 19966 Make sure if MultiIndexed index has tuple name, they will be
321+
# recognised as a whole
322+
if clocs in index.names:
323+
clocs = [clocs]
320324
clocs = [index._get_level_number(i) for i in clocs]
321325

322326
rlocs = [i for i in range(index.nlevels) if i not in clocs]

pandas/tests/frame/test_reshape.py

+74
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,80 @@ def test_unstack_fill_frame_categorical(self):
336336
)
337337
tm.assert_frame_equal(result, expected)
338338

339+
def test_unstack_tuplename_in_multiindex(self):
340+
# GH 19966
341+
idx = pd.MultiIndex.from_product(
342+
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
343+
)
344+
df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx)
345+
result = df.unstack(("A", "a"))
346+
347+
expected = pd.DataFrame(
348+
[[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]],
349+
columns=pd.MultiIndex.from_tuples(
350+
[
351+
("d", "a"),
352+
("d", "b"),
353+
("d", "c"),
354+
("e", "a"),
355+
("e", "b"),
356+
("e", "c"),
357+
],
358+
names=[None, ("A", "a")],
359+
),
360+
index=pd.Index([1, 2, 3], name=("B", "b")),
361+
)
362+
tm.assert_frame_equal(result, expected)
363+
364+
@pytest.mark.parametrize(
365+
"unstack_idx, expected_values, expected_index, expected_columns",
366+
[
367+
(
368+
("A", "a"),
369+
[[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]],
370+
pd.MultiIndex.from_tuples(
371+
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
372+
),
373+
pd.MultiIndex.from_tuples(
374+
[("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")],
375+
names=[None, ("A", "a")],
376+
),
377+
),
378+
(
379+
(("A", "a"), "B"),
380+
[[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]],
381+
pd.Index([3, 4], name="C"),
382+
pd.MultiIndex.from_tuples(
383+
[
384+
("d", "a", 1),
385+
("d", "a", 2),
386+
("d", "b", 1),
387+
("d", "b", 2),
388+
("e", "a", 1),
389+
("e", "a", 2),
390+
("e", "b", 1),
391+
("e", "b", 2),
392+
],
393+
names=[None, ("A", "a"), "B"],
394+
),
395+
),
396+
],
397+
)
398+
def test_unstack_mixed_type_name_in_multiindex(
399+
self, unstack_idx, expected_values, expected_index, expected_columns
400+
):
401+
# GH 19966
402+
idx = pd.MultiIndex.from_product(
403+
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
404+
)
405+
df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx)
406+
result = df.unstack(unstack_idx)
407+
408+
expected = pd.DataFrame(
409+
expected_values, columns=expected_columns, index=expected_index,
410+
)
411+
tm.assert_frame_equal(result, expected)
412+
339413
def test_unstack_preserve_dtypes(self):
340414
# Checks fix for #11847
341415
df = pd.DataFrame(

pandas/tests/series/test_analytics.py

+1-60
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas.util._test_decorators as td
77

88
import pandas as pd
9-
from pandas import DataFrame, MultiIndex, Series
9+
from pandas import DataFrame, Series
1010
import pandas._testing as tm
1111

1212

@@ -160,65 +160,6 @@ def test_is_monotonic(self):
160160
assert s.is_monotonic is False
161161
assert s.is_monotonic_decreasing is True
162162

163-
def test_unstack(self):
164-
165-
index = MultiIndex(
166-
levels=[["bar", "foo"], ["one", "three", "two"]],
167-
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
168-
)
169-
170-
s = Series(np.arange(4.0), index=index)
171-
unstacked = s.unstack()
172-
173-
expected = DataFrame(
174-
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
175-
index=["bar", "foo"],
176-
columns=["one", "three", "two"],
177-
)
178-
179-
tm.assert_frame_equal(unstacked, expected)
180-
181-
unstacked = s.unstack(level=0)
182-
tm.assert_frame_equal(unstacked, expected.T)
183-
184-
index = MultiIndex(
185-
levels=[["bar"], ["one", "two", "three"], [0, 1]],
186-
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
187-
)
188-
s = Series(np.random.randn(6), index=index)
189-
exp_index = MultiIndex(
190-
levels=[["one", "two", "three"], [0, 1]],
191-
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
192-
)
193-
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
194-
unstacked = s.unstack(0).sort_index()
195-
tm.assert_frame_equal(unstacked, expected)
196-
197-
# GH5873
198-
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
199-
ts = pd.Series([1, 2], index=idx)
200-
left = ts.unstack()
201-
right = DataFrame(
202-
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
203-
)
204-
tm.assert_frame_equal(left, right)
205-
206-
idx = pd.MultiIndex.from_arrays(
207-
[
208-
["cat", "cat", "cat", "dog", "dog"],
209-
["a", "a", "b", "a", "b"],
210-
[1, 2, 1, 1, np.nan],
211-
]
212-
)
213-
ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
214-
right = DataFrame(
215-
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
216-
columns=["cat", "dog"],
217-
)
218-
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
219-
right.index = pd.MultiIndex.from_tuples(tpls)
220-
tm.assert_frame_equal(ts.unstack(level=0), right)
221-
222163
@pytest.mark.parametrize("func", [np.any, np.all])
223164
@pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())])
224165
@td.skip_if_np_lt("1.15")

pandas/tests/series/test_reshaping.py

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import DataFrame, MultiIndex, Series
6+
import pandas._testing as tm
7+
8+
9+
def test_unstack():
10+
index = MultiIndex(
11+
levels=[["bar", "foo"], ["one", "three", "two"]],
12+
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
13+
)
14+
15+
s = Series(np.arange(4.0), index=index)
16+
unstacked = s.unstack()
17+
18+
expected = DataFrame(
19+
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
20+
index=["bar", "foo"],
21+
columns=["one", "three", "two"],
22+
)
23+
24+
tm.assert_frame_equal(unstacked, expected)
25+
26+
unstacked = s.unstack(level=0)
27+
tm.assert_frame_equal(unstacked, expected.T)
28+
29+
index = MultiIndex(
30+
levels=[["bar"], ["one", "two", "three"], [0, 1]],
31+
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
32+
)
33+
s = Series(np.random.randn(6), index=index)
34+
exp_index = MultiIndex(
35+
levels=[["one", "two", "three"], [0, 1]],
36+
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
37+
)
38+
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
39+
unstacked = s.unstack(0).sort_index()
40+
tm.assert_frame_equal(unstacked, expected)
41+
42+
# GH5873
43+
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
44+
ts = pd.Series([1, 2], index=idx)
45+
left = ts.unstack()
46+
right = DataFrame(
47+
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
48+
)
49+
tm.assert_frame_equal(left, right)
50+
51+
idx = pd.MultiIndex.from_arrays(
52+
[
53+
["cat", "cat", "cat", "dog", "dog"],
54+
["a", "a", "b", "a", "b"],
55+
[1, 2, 1, 1, np.nan],
56+
]
57+
)
58+
ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
59+
right = DataFrame(
60+
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
61+
columns=["cat", "dog"],
62+
)
63+
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
64+
right.index = pd.MultiIndex.from_tuples(tpls)
65+
tm.assert_frame_equal(ts.unstack(level=0), right)
66+
67+
68+
def test_unstack_tuplename_in_multiindex():
69+
# GH 19966
70+
idx = pd.MultiIndex.from_product(
71+
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
72+
)
73+
ser = pd.Series(1, index=idx)
74+
result = ser.unstack(("A", "a"))
75+
76+
expected = pd.DataFrame(
77+
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
78+
columns=pd.MultiIndex.from_tuples(
79+
[("a",), ("b",), ("c",)], names=[("A", "a")],
80+
),
81+
index=pd.Index([1, 2, 3], name=("B", "b")),
82+
)
83+
tm.assert_frame_equal(result, expected)
84+
85+
86+
@pytest.mark.parametrize(
87+
"unstack_idx, expected_values, expected_index, expected_columns",
88+
[
89+
(
90+
("A", "a"),
91+
[[1, 1], [1, 1], [1, 1], [1, 1]],
92+
pd.MultiIndex.from_tuples(
93+
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
94+
),
95+
pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
96+
),
97+
(
98+
(("A", "a"), "B"),
99+
[[1, 1, 1, 1], [1, 1, 1, 1]],
100+
pd.Index([3, 4], name="C"),
101+
pd.MultiIndex.from_tuples(
102+
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
103+
),
104+
),
105+
],
106+
)
107+
def test_unstack_mixed_type_name_in_multiindex(
108+
unstack_idx, expected_values, expected_index, expected_columns
109+
):
110+
# GH 19966
111+
idx = pd.MultiIndex.from_product(
112+
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
113+
)
114+
ser = pd.Series(1, index=idx)
115+
result = ser.unstack(unstack_idx)
116+
117+
expected = pd.DataFrame(
118+
expected_values, columns=expected_columns, index=expected_index,
119+
)
120+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)