diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5c39377899a20..0e68c3799efa7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -447,6 +447,7 @@ Indexing - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) +- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df58593bc930c..73038bb44e236 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4568,10 +4568,7 @@ def get_value(self, series: "Series", key): ------- scalar or Series """ - if not is_scalar(key): - # if key is not a scalar, directly raise an error (the code below - # would convert to numpy arrays and raise later any way) - GH29926 - raise InvalidIndexError(key) + self._check_indexing_error(key) try: # GH 20882, 21257 @@ -4592,6 +4589,12 @@ def get_value(self, series: "Series", key): return self._get_values_for_loc(series, loc, key) + def _check_indexing_error(self, key): + if not is_scalar(key): + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) + def _should_fallback_to_positional(self) -> bool: """ If an integer key is not found, should we fall back to positional indexing? diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7aa1456846612..6e36029441f1b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2333,23 +2333,21 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): # -------------------------------------------------------------------- # Indexing Methods - def get_value(self, series, key): - # Label-based + def _check_indexing_error(self, key): if not is_hashable(key) or is_iterator(key): # We allow tuples if they are hashable, whereas other Index # subclasses require scalar. # We have to explicitly exclude generators, as these are hashable. raise InvalidIndexError(key) - try: - loc = self.get_loc(key) - except KeyError: - if is_integer(key): - loc = key - else: - raise - - return self._get_values_for_loc(series, loc, key) + def _should_fallback_to_positional(self) -> bool: + """ + If an integer key is not found, should we fall back to positional indexing? + """ + if not self.nlevels: + return False + # GH#33355 + return self.levels[0]._should_fallback_to_positional() def _get_values_for_loc(self, series: "Series", loc, key): """ diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 7e75b5324445e..54b22dbc53466 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -87,8 +87,8 @@ def test_series_getitem_returns_scalar( (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), - (lambda s: s.__getitem__(len(s)), IndexError, "is out of bounds"), - (lambda s: s[len(s)], IndexError, "is out of bounds"), + (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) + (lambda s: s[len(s)], KeyError, ""), # match should include len(s) ( lambda s: s.iloc[len(s)], IndexError, diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 9d181bdcb9491..ed11af8ef68ad 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Float64Index, Int64Index, MultiIndex import pandas._testing as tm @@ -126,7 +126,32 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # this works...for now df["A"].iloc[14] = 5 - assert df["A"][14] == 5 + assert df["A"].iloc[14] == 5 + + @pytest.mark.parametrize("dtype", [int, float]) + def test_getitem_intkey_leading_level( + self, multiindex_year_month_day_dataframe_random_data, dtype + ): + # GH#33355 dont fall-back to positional when leading level is int + ymd = multiindex_year_month_day_dataframe_random_data + levels = ymd.index.levels + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) + ser = ymd["A"] + mi = ser.index + assert isinstance(mi, MultiIndex) + if dtype is int: + assert isinstance(mi.levels[0], Int64Index) + else: + assert isinstance(mi.levels[0], Float64Index) + + assert 14 not in mi.levels[0] + assert not mi.levels[0]._should_fallback_to_positional() + assert not mi._should_fallback_to_positional() + + with pytest.raises(KeyError, match="14"): + ser[14] + with pytest.raises(KeyError, match="14"): + mi.get_value(ser, 14) # --------------------------------------------------------------------- # AMBIGUOUS CASES! @@ -140,7 +165,7 @@ def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_da tm.assert_series_equal(result, expected) # need to put in some work here - + # FIXME: dont leave commented-out # self.ymd.loc[2000, 0] = 0 # assert (self.ymd.loc[2000]['A'] == 0).all() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 1f19244cf76d3..853b92ea91274 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -236,6 +236,7 @@ def f(name, df2): f_index ) + # FIXME: dont leave commented-out # TODO(wesm): unused? # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T @@ -255,7 +256,11 @@ def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): assert notna(s.values[65:]).all() s[2000, 3, 10] = np.nan - assert isna(s[49]) + assert isna(s.iloc[49]) + + with pytest.raises(KeyError, match="49"): + # GH#33355 dont fall-back to positional when leading level is int + s[49] def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data