From 5473e1063362837ce3e7d15bec8d72c144b72606 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 16 Apr 2021 22:24:08 +0200 Subject: [PATCH 1/3] Bug in loc returning multiindex in wrong oder with duplicated indexer --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexes/multi.py | 1 + pandas/tests/indexing/multiindex/test_loc.py | 18 ++++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1c7942dfedafa..36cd558ce36fb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -708,6 +708,7 @@ Indexing - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) +- Bug in :meth:`DataFrame.loc` returning :class:`MultiIndex` in wrong order if indexer has duplicates (:issue:`40978`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3305610a4022e..6b709124b4e88 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3440,6 +3440,7 @@ def _reorder_indexer( new_order = np.arange(n)[indexer] elif is_list_like(k): # Generate a map with all level codes as sorted initially + k = algos.unique(k) key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( self.levels[i] ) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 96d2c246dd0ee..0ba13664a149f 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -764,6 +764,24 @@ def test_loc_getitem_index_differently_ordered_slice_none(): tm.assert_frame_equal(result, expected) +def test_loc_getitem_index_differently_ordered_slice_none_duplicates(): + # GH#40978 + df = DataFrame( + [1] * 8, + index=MultiIndex.from_tuples( + [(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)] + ), + columns=["a"], + ) + result = df.loc[(slice(None), df.index.get_level_values(1)), :] + expected = DataFrame( + [1] * 8, + index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], + columns=["a"], + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_drops_levels_for_one_row_dataframe(): # GH#10521 mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"]) From 773807497ccb8552922d8c21fe3fd0c5def86151 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 16 Apr 2021 22:26:13 +0200 Subject: [PATCH 2/3] Parametrize test --- pandas/tests/indexing/multiindex/test_loc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 0ba13664a149f..8728384ec5d67 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -764,7 +764,8 @@ def test_loc_getitem_index_differently_ordered_slice_none(): tm.assert_frame_equal(result, expected) -def test_loc_getitem_index_differently_ordered_slice_none_duplicates(): +@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]]) +def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): # GH#40978 df = DataFrame( [1] * 8, @@ -773,7 +774,7 @@ def test_loc_getitem_index_differently_ordered_slice_none_duplicates(): ), columns=["a"], ) - result = df.loc[(slice(None), df.index.get_level_values(1)), :] + result = df.loc[(slice(None), indexer), :] expected = DataFrame( [1] * 8, index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]], From 7f64bd110ccfe856db093b7093fb4bce63e367a9 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 24 May 2021 20:20:18 +0200 Subject: [PATCH 3/3] Add second test --- pandas/tests/indexing/multiindex/test_loc.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 24f47ea5d530f..cf1ab3eadc7a4 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -782,6 +782,9 @@ def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer): ) tm.assert_frame_equal(result, expected) + result = df.loc[df.index.isin(indexer, level=1), :] + tm.assert_frame_equal(result, df) + def test_loc_getitem_drops_levels_for_one_row_dataframe(): # GH#10521