From 02116880d656e12ff39e08897a94d43d43196a14 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 5 May 2021 22:19:59 +0200 Subject: [PATCH 1/8] Temp changes --- pandas/core/indexes/multi.py | 7 +++++++ pandas/tests/indexing/test_loc.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a68238af003e4..9f24903ec3194 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -72,6 +72,7 @@ from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import factorize_from_iterables import pandas.core.common as com +from pandas.core.indexers import is_empty_indexer import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -2555,6 +2556,8 @@ def reindex( target = self elif (indexer >= 0).all(): target = self.take(indexer) + # elif (indexer == -1).all(): + # target = self else: # hopefully? target = MultiIndex.from_tuples(target) @@ -2634,6 +2637,10 @@ def _convert_listlike_indexer(self, keyarr): mask = check == -1 if mask.any(): raise KeyError(f"{keyarr[mask]} not in index") + elif is_empty_indexer(indexer, keyarr): + # We get here when levels still contain values which are not + # actually in Index anymore + raise KeyError(f"{keyarr} not in index") return indexer, keyarr diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 11391efde4956..a1c646b4dc0b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1624,6 +1624,13 @@ def test_loc_getitem_preserves_index_level_category_dtype(self): result = df.loc[["a"]].index.levels[0] tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("lt_value", [30, 10]) + def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value): + # GH#41170 + df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]]) + with pytest.raises(KeyError, match=r"\['b'\] not in index"): + df.loc[df["a"] < lt_value, :].loc[["b"], :] + class TestLocSetitemWithExpansion: @pytest.mark.slow From 33b3dc74a4a059cf61b28adf257086f7ce9acc18 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 6 May 2021 22:00:39 +0200 Subject: [PATCH 2/8] Fix second bug --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexes/multi.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 69af8b500f790..5aa809eaeec5f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -784,6 +784,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`) +- Bug in :meth:`DataFrame.loc` not raising ``KeyError`` when key was not found in :class:`MultiIndex` when levels contain more values than used (:issue:`41170`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9f24903ec3194..6207d2de596f5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2626,8 +2626,10 @@ def _convert_listlike_indexer(self, keyarr): # are we indexing a specific level if indexer is None and len(keyarr) and not isinstance(keyarr[0], tuple): - level = 0 - _, indexer = self.reindex(keyarr, level=level) + indexer = None + if len(self): + level = 0 + _, indexer = self.reindex(keyarr, level=level) # take all if indexer is None: From 0ccafefdf5031fa2801f98d52e7390590ff258d2 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 6 May 2021 22:39:18 +0200 Subject: [PATCH 3/8] Fix bug in reindex with empty MultiIndex --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/multi.py | 6 ++---- pandas/tests/indexes/multi/test_reindex.py | 11 +++++++++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5aa809eaeec5f..397e7c80b8c25 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -804,6 +804,7 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`MultiIndex.reindex` raising ``ValueError`` with empty MultiIndex and indexing only a specific level (:issue:`41170`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 84f1245299d53..3785ade4688d2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4229,7 +4229,8 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray: else: # tie out the order with other if level == 0: # outer most level, take the fast route - ngroups = 1 + new_lev_codes.max() + max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() + ngroups = 1 + max_new_lev left_indexer, counts = libalgos.groupsort_indexer( new_lev_codes, ngroups ) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6207d2de596f5..9f24903ec3194 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2626,10 +2626,8 @@ def _convert_listlike_indexer(self, keyarr): # are we indexing a specific level if indexer is None and len(keyarr) and not isinstance(keyarr[0], tuple): - indexer = None - if len(self): - level = 0 - _, indexer = self.reindex(keyarr, level=level) + level = 0 + _, indexer = self.reindex(keyarr, level=level) # take all if indexer is None: diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 5ed34cd766bce..10d93cc539f0e 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -104,3 +104,14 @@ def test_reindex_non_unique(): msg = "cannot handle a non-unique multi-index!" with pytest.raises(ValueError, match=msg): a.reindex(new_idx) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + idx = MultiIndex.from_arrays(values) + result, result_indexer = idx.reindex(np.array(["b"]), level=0) + expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) + expected_indexer = np.array([], dtype="int64") + tm.assert_index_equal(result, expected) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) From c1947e5cfe80303b398c94602a628cf22ba71e42 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 6 May 2021 22:44:11 +0200 Subject: [PATCH 4/8] Remove comment --- pandas/core/indexes/multi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9f24903ec3194..c1295a98bf357 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2556,8 +2556,6 @@ def reindex( target = self elif (indexer >= 0).all(): target = self.take(indexer) - # elif (indexer == -1).all(): - # target = self else: # hopefully? target = MultiIndex.from_tuples(target) From df66e825b06c691574d58e0faf5d473e35981005 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 6 May 2021 23:20:43 +0200 Subject: [PATCH 5/8] Change dtype --- pandas/tests/indexes/multi/test_reindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 10d93cc539f0e..b69859ac2728b 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -112,6 +112,6 @@ def test_reindex_empty_with_level(values): idx = MultiIndex.from_arrays(values) result, result_indexer = idx.reindex(np.array(["b"]), level=0) expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) - expected_indexer = np.array([], dtype="int64") + expected_indexer = np.array([], dtype="int") tm.assert_index_equal(result, expected) tm.assert_numpy_array_equal(result_indexer, expected_indexer) From 5477448e29f464c17ec27eaea38bf6a77055b8ee Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 7 May 2021 11:21:55 +0200 Subject: [PATCH 6/8] Use same dtype --- pandas/tests/indexes/multi/test_reindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index b69859ac2728b..3b0fcd72f3123 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -112,6 +112,6 @@ def test_reindex_empty_with_level(values): idx = MultiIndex.from_arrays(values) result, result_indexer = idx.reindex(np.array(["b"]), level=0) expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) - expected_indexer = np.array([], dtype="int") + expected_indexer = np.array([], dtype=result_indexer.dtype) tm.assert_index_equal(result, expected) tm.assert_numpy_array_equal(result_indexer, expected_indexer) From 9e2e8373aacf57771cd2ed9cd683d2252dd8f008 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 7 May 2021 17:58:11 +0200 Subject: [PATCH 7/8] Add test for series reindex --- pandas/tests/series/methods/test_reindex.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 8e54cbeb313c4..7400380b6af6d 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -4,6 +4,7 @@ from pandas import ( Categorical, Index, + MultiIndex, NaT, Period, PeriodIndex, @@ -345,3 +346,14 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va result = ser.reindex(object_index) expected = Series(expected_values, index=object_index) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + ser = Series(range(len(values[0])), index=MultiIndex.from_arrays(values)) + result = ser.reindex(np.array(["b"]), level=0) + expected = Series( + index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype=result.dtype + ) + tm.assert_series_equal(result, expected) From 338016d444411fef2ae1c9308b0a1d48871bb2be Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 7 May 2021 23:57:52 +0200 Subject: [PATCH 8/8] Fix deprecation warning --- pandas/tests/series/methods/test_reindex.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 7400380b6af6d..36d3971d10a3d 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -351,9 +351,11 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) def test_reindex_empty_with_level(values): # GH41170 - ser = Series(range(len(values[0])), index=MultiIndex.from_arrays(values)) + ser = Series( + range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object" + ) result = ser.reindex(np.array(["b"]), level=0) expected = Series( - index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype=result.dtype + index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object" ) tm.assert_series_equal(result, expected)