diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index cf3dd1b0e3226..495ed86f2cc4e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -787,6 +787,7 @@ Indexing - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`) - Bug in :meth:`DataFrame.__setitem__` and :meth:`DataFrame.iloc.__setitem__` raising ``ValueError`` when trying to index with a row-slice and setting a list as values (:issue:`40440`) +- Bug in :meth:`DataFrame.loc` not raising ``KeyError`` when key was not found in :class:`MultiIndex` when levels contain more values than used (:issue:`41170`) - Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`) - Bug in :meth:`DataFrame.loc` incorrectly matching non-boolean index elements (:issue:`20432`) - Bug in :meth:`Series.__delitem__` with ``ExtensionDtype`` incorrectly casting to ``ndarray`` (:issue:`40386`) @@ -806,6 +807,7 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in result (:issue:`38623`) - Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when :class:`MultiIndex` containing ``NaN`` even when they are differently ordered (:issue:`38439`) - Bug in :meth:`MultiIndex.intersection` always returning empty when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`MultiIndex.reindex` raising ``ValueError`` with empty MultiIndex and indexing only a specific level (:issue:`41170`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 84f1245299d53..3785ade4688d2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4229,7 +4229,8 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray: else: # tie out the order with other if level == 0: # outer most level, take the fast route - ngroups = 1 + new_lev_codes.max() + max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() + ngroups = 1 + max_new_lev left_indexer, counts = libalgos.groupsort_indexer( new_lev_codes, ngroups ) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a68238af003e4..c1295a98bf357 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -72,6 +72,7 @@ from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import factorize_from_iterables import pandas.core.common as com +from pandas.core.indexers import is_empty_indexer import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, @@ -2634,6 +2635,10 @@ def _convert_listlike_indexer(self, keyarr): mask = check == -1 if mask.any(): raise KeyError(f"{keyarr[mask]} not in index") + elif is_empty_indexer(indexer, keyarr): + # We get here when levels still contain values which are not + # actually in Index anymore + raise KeyError(f"{keyarr} not in index") return indexer, keyarr diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index 5ed34cd766bce..3b0fcd72f3123 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -104,3 +104,14 @@ def test_reindex_non_unique(): msg = "cannot handle a non-unique multi-index!" with pytest.raises(ValueError, match=msg): a.reindex(new_idx) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + idx = MultiIndex.from_arrays(values) + result, result_indexer = idx.reindex(np.array(["b"]), level=0) + expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []]) + expected_indexer = np.array([], dtype=result_indexer.dtype) + tm.assert_index_equal(result, expected) + tm.assert_numpy_array_equal(result_indexer, expected_indexer) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 11391efde4956..a1c646b4dc0b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1624,6 +1624,13 @@ def test_loc_getitem_preserves_index_level_category_dtype(self): result = df.loc[["a"]].index.levels[0] tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("lt_value", [30, 10]) + def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value): + # GH#41170 + df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]]) + with pytest.raises(KeyError, match=r"\['b'\] not in index"): + df.loc[df["a"] < lt_value, :].loc[["b"], :] + class TestLocSetitemWithExpansion: @pytest.mark.slow diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 8e54cbeb313c4..36d3971d10a3d 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -4,6 +4,7 @@ from pandas import ( Categorical, Index, + MultiIndex, NaT, Period, PeriodIndex, @@ -345,3 +346,16 @@ def test_reindex_periodindex_with_object(p_values, o_values, values, expected_va result = ser.reindex(object_index) expected = Series(expected_values, index=object_index) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]]) +def test_reindex_empty_with_level(values): + # GH41170 + ser = Series( + range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object" + ) + result = ser.reindex(np.array(["b"]), level=0) + expected = Series( + index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object" + ) + tm.assert_series_equal(result, expected)