Skip to content

Commit 26aa99c

Browse files
committed
Improve MultiIndex label rename checks
1 parent 1f622e2 commit 26aa99c

File tree

4 files changed

+60
-12
lines changed

4 files changed

+60
-12
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ Other
285285
^^^^^
286286
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
287287
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
288+
- Fixed bug in :meth:`DataFrame.rename` where checks on argument errors="raise" are not consistent with the actual transformation applied (:issue:`55169`). Logic change is accompanied with improvement to docs, a new test and a more descriptive ``KeyError`` message when a tuple label rename is attempted across :class:`MultiIndex` levels
288289

289290
.. ***DO NOT USE THIS SECTION***
290291

pandas/core/frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5400,6 +5400,11 @@ def rename(
54005400
level : int or level name, default None
54015401
In case of a MultiIndex, only rename labels in the specified
54025402
level.
5403+
5404+
.. note::
5405+
Labels are renamed individually, and not via tuples across
5406+
MultiIndex levels
5407+
54035408
errors : {'ignore', 'raise'}, default 'ignore'
54045409
If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`,
54055410
or `columns` contains labels that are not present in the Index

pandas/core/generic.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,18 +1035,36 @@ def _rename(
10351035

10361036
# GH 13473
10371037
if not callable(replacements):
1038-
if ax._is_multi and level is not None:
1039-
indexer = ax.get_level_values(level).get_indexer_for(replacements)
1040-
else:
1041-
indexer = ax.get_indexer_for(replacements)
1042-
1043-
if errors == "raise" and len(indexer[indexer == -1]):
1044-
missing_labels = [
1045-
label
1046-
for index, label in enumerate(replacements)
1047-
if indexer[index] == -1
1048-
]
1049-
raise KeyError(f"{missing_labels} not found in axis")
1038+
if errors == "raise":
1039+
missing_labels = []
1040+
for replacement in replacements:
1041+
if ax._is_multi:
1042+
indexers = [
1043+
ax.get_level_values(i).get_indexer_for([replacement])
1044+
for i in range(ax.nlevels)
1045+
if i == level or level is None
1046+
]
1047+
else:
1048+
indexers = [ax.get_indexer_for([replacement])]
1049+
1050+
found_anywhere = any(any(indexer != -1) for indexer in indexers)
1051+
if not found_anywhere:
1052+
missing_labels.append(replacement)
1053+
1054+
if len(missing_labels) > 0:
1055+
error = f"{missing_labels} not found in axis"
1056+
if ax._is_multi:
1057+
tuple_rename_tried = any(
1058+
type(label) is tuple and label in ax
1059+
for label in missing_labels
1060+
)
1061+
if tuple_rename_tried:
1062+
error += (
1063+
". Please provide individual labels for "
1064+
"replacement, and not tuples across "
1065+
"MultiIndex levels"
1066+
)
1067+
raise KeyError(error)
10501068

10511069
new_index = ax._transform_index(f, level=level)
10521070
result._set_axis_nocheck(new_index, axis=axis_no, inplace=True)

pandas/tests/frame/methods/test_rename.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,16 @@ def test_rename_multiindex(self):
164164
renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
165165
tm.assert_index_equal(renamed.index, new_index)
166166

167+
def test_rename_multiindex_with_checks(self):
168+
df = DataFrame({("a", "count"): [1, 2], ("a", "sum"): [3, 4]})
169+
renamed = df.rename(
170+
columns={"a": "b", "count": "number_of", "sum": "total"}, errors="raise"
171+
)
172+
173+
new_columns = MultiIndex.from_tuples([("b", "number_of"), ("b", "total")])
174+
175+
tm.assert_index_equal(renamed.columns, new_columns)
176+
167177
def test_rename_nocopy(self, float_frame):
168178
renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
169179

@@ -221,6 +231,20 @@ def test_rename_errors_raises(self):
221231
with pytest.raises(KeyError, match="'E'] not found in axis"):
222232
df.rename(columns={"A": "a", "E": "e"}, errors="raise")
223233

234+
def test_rename_error_raised_for_label_across_multiindex_levels(self):
235+
df = DataFrame([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
236+
df = df.groupby("a").agg({"b": ("count", "sum")})
237+
with pytest.raises(
238+
KeyError,
239+
match=(
240+
"\\[\\('b', 'count'\\)\\] not found "
241+
"in axis\\. Please provide individual "
242+
"labels for replacement, and not "
243+
"tuples across MultiIndex levels"
244+
),
245+
):
246+
df.rename(columns={("b", "count"): "new"}, errors="raise")
247+
224248
@pytest.mark.parametrize(
225249
"mapper, errors, expected_columns",
226250
[

0 commit comments

Comments
 (0)