Skip to content

Commit 9857248

Browse files
dubourgjorisvandenbossche
authored andcommitted
[Backport #14449] Type codes and categories as lists instead of tuples in _factorize_from_iterables (fixes #14438)
(cherry picked from commit 0b6946b)
1 parent a1e73ee commit 9857248

File tree

3 files changed

+24
-5
lines changed

3 files changed

+24
-5
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Bug Fixes
4545

4646
- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
4747
- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`)
48+
- Bug in ``pd.concat`` with dataframes heterogeneous in length and tuple ``keys`` (:issue:`14438`)
4849
- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`)
4950
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
5051
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)

pandas/core/categorical.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2055,14 +2055,14 @@ def _factorize_from_iterables(iterables):
20552055
20562056
Returns
20572057
-------
2058-
codes_tuple : tuple of ndarrays
2059-
categories_tuple : tuple of Indexes
2058+
codes_list : list of ndarrays
2059+
categories_list : list of Indexes
20602060
20612061
Notes
20622062
-----
20632063
See `_factorize_from_iterable` for more info.
20642064
"""
20652065
if len(iterables) == 0:
2066-
# For consistency, it should return a list of 2 tuples.
2067-
return [(), ()]
2068-
return lzip(*[_factorize_from_iterable(it) for it in iterables])
2066+
# For consistency, it should return a list of 2 lists.
2067+
return [[], []]
2068+
return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))

pandas/tests/frame/test_combine_concat.py

+18
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ def test_concat_multiple_tzs(self):
5757
expected = DataFrame(dict(time=[ts2, ts3]))
5858
assert_frame_equal(results, expected)
5959

60+
def test_concat_tuple_keys(self):
61+
# GH 14438
62+
df1 = pd.DataFrame(np.ones((2, 2)), columns=list('AB'))
63+
df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list('AB'))
64+
results = pd.concat((df1, df2), keys=[('bee', 'bah'), ('bee', 'boo')])
65+
expected = pd.DataFrame(
66+
{'A': {('bee', 'bah', 0): 1.0,
67+
('bee', 'bah', 1): 1.0,
68+
('bee', 'boo', 0): 2.0,
69+
('bee', 'boo', 1): 2.0,
70+
('bee', 'boo', 2): 2.0},
71+
'B': {('bee', 'bah', 0): 1.0,
72+
('bee', 'bah', 1): 1.0,
73+
('bee', 'boo', 0): 2.0,
74+
('bee', 'boo', 1): 2.0,
75+
('bee', 'boo', 2): 2.0}})
76+
assert_frame_equal(results, expected)
77+
6078
def test_append_series_dict(self):
6179
df = DataFrame(np.random.randn(5, 4),
6280
columns=['foo', 'bar', 'baz', 'qux'])

0 commit comments

Comments
 (0)