From ac79b3b555ddebd6299f6d7080f362b085fbed92 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 24 Dec 2021 17:11:28 +0100 Subject: [PATCH 1/2] DEPR: Deprecate set and dict as indexers --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/frame.py | 25 ++++++++ pandas/core/indexing.py | 33 ++++++++++ pandas/core/series.py | 26 ++++++++ pandas/tests/frame/indexing/test_getitem.py | 20 +++++- pandas/tests/frame/indexing/test_indexing.py | 62 +++++++++++++++++++ pandas/tests/indexing/multiindex/test_loc.py | 7 ++- pandas/tests/series/indexing/test_getitem.py | 16 +++++ pandas/tests/series/indexing/test_indexing.py | 31 ++++++++++ 9 files changed, 217 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index febf08f2c47aa..1fde030d4b7a8 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -545,6 +545,7 @@ Other Deprecations - Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`) - A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) +- Deprecated passing ``set`` or ``dict`` as indexer for :meth:`DataFrame.loc.__setitem__`, :meth:`DataFrame.loc.__getitem__`, :meth:`Series.loc.__setitem__`, :meth:`Series.loc.__getitem__`, :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__` and :meth:`Series.__setitem__` (:issue:`42825`) - Deprecated :meth:`Index.__getitem__` with a bool key; use ``index.values[key]`` to get the old behavior (:issue:`44051`) - Deprecated downcasting column-by-column in :meth:`DataFrame.where` with integer-dtypes (:issue:`44597`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 03c9addefecc0..f69d7d2f463c1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3456,7 +3456,32 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: for i in range(len(self.columns)): yield self._get_column_array(i) + def _check_deprecated_indexers(self, key): + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + def __getitem__(self, key): + self._check_deprecated_indexers(key) key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f043a8cee308c..dc7e0973b545c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -641,6 +641,10 @@ def _get_setitem_indexer(self, key): if self.name == "loc": self._ensure_listlike_indexer(key) + if isinstance(key, tuple): + for x in key: + self._check_deprecated_indexers(x) + if self.axis is not None: return self._convert_tuple(key) @@ -698,6 +702,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): ) def __setitem__(self, key, value): + self._check_deprecated_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -890,6 +895,9 @@ def _getitem_nested_tuple(self, tup: tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here + for key in tup: + self._check_deprecated_indexers(key) + # we have too many indexers for our dim, but have at least 1 # multi-index dimension, try to see if we have something like # a tuple passed to a series with a multi-index @@ -942,7 +950,32 @@ def _getitem_nested_tuple(self, tup: tuple): def _convert_to_indexer(self, key, axis: int): raise AbstractMethodError(self) + def _check_deprecated_indexers(self, key): + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + def __getitem__(self, key): + self._check_deprecated_indexers(key) if type(key) is tuple: key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) diff --git a/pandas/core/series.py b/pandas/core/series.py index 15805c0aa94ed..8e21549a3ea1d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -938,7 +938,32 @@ def _slice(self, slobj: slice, axis: int = 0) -> Series: # _slice is *always* positional return self._get_values(slobj) + def _check_deprecated_indexers(self, key): + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + def __getitem__(self, key): + self._check_deprecated_indexers(key) key = com.apply_if_callable(key, self) if key is Ellipsis: @@ -1065,6 +1090,7 @@ def _get_value(self, label, takeable: bool = False): return self.index._get_values_for_loc(self, loc, label) def __setitem__(self, key, value) -> None: + self._check_deprecated_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible() diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 3028a433f2dae..0d4ab84175aab 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -134,7 +134,11 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): idx = idx_type(keys) idx_check = list(idx_type(keys)) - result = frame[idx] + if isinstance(idx, (set, dict)): + with tm.assert_produces_warning(FutureWarning): + result = frame[idx] + else: + result = frame[idx] expected = frame.loc[:, idx_check] expected.columns.names = frame.columns.names @@ -143,7 +147,8 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): idx = idx_type(keys + [missing]) with pytest.raises(KeyError, match="not in index"): - frame[idx] + with tm.assert_produces_warning(FutureWarning): + frame[idx] def test_getitem_iloc_generator(self): # GH#39614 @@ -388,3 +393,14 @@ def test_getitem_datetime_slice(self): ), ) tm.assert_frame_equal(result, expected) + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)]) + ) + with tm.assert_produces_warning(FutureWarning): + df[key] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 2194fb4d5b1bd..bee8ccb125315 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1526,3 +1526,65 @@ def test_loc_iloc_setitem_non_categorical_rhs( # "c" not part of the categories with pytest.raises(TypeError, match=msg1): indexer(df)[key] = ["c", "c"] + + +class TestDepreactedIndexers: + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] + + @pytest.mark.parametrize( + "key", [{1}, {1: 1}, ({1}, "a"), ({1: 1}, "a"), (1, {"a"}), (1, {"a": "a"})] + ) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 + + @pytest.mark.parametrize( + "key", + [ + {1}, + {1: 1}, + (({1}, 2), "a"), + (({1: 1}, 2), "a"), + ((1, 2), {"a"}), + ((1, 2), {"a": "a"}), + ], + ) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + df = DataFrame( + [[1, 2], [3, 4]], + columns=["a", "b"], + index=MultiIndex.from_tuples([(1, 2), (3, 4)]), + ) + with tm.assert_produces_warning(FutureWarning): + df.loc[key] = 1 diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 1756cc3ae707c..6e59311634c76 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -339,8 +339,11 @@ def convert_nested_indexer(indexer_type, keys): convert_nested_indexer(indexer_type, k) for indexer_type, k in zip(types, keys) ) - - result = df.loc[indexer, "Data"] + if indexer_type_1 is set or indexer_type_2 is set: + with tm.assert_produces_warning(FutureWarning): + result = df.loc[indexer, "Data"] + else: + result = df.loc[indexer, "Data"] expected = Series( [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) ) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index dabe3d480ff19..0da376ccac450 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -696,3 +696,19 @@ def test_duplicated_index_getitem_positional_indexer(index_vals): s = Series(range(5), index=list(index_vals)) result = s[3] assert result == 3 + + +class TestGetitemDeprecatedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + ser[key] = 1 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 31c21e123a0de..ff0a4ae1b5564 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -8,6 +8,7 @@ from pandas import ( DataFrame, IndexSlice, + MultiIndex, Series, Timedelta, Timestamp, @@ -316,3 +317,33 @@ def test_frozenset_index(): assert s[idx1] == 2 s[idx1] = 3 assert s[idx1] == 3 + + +class TestDepreactedIndexers: + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_getitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_getitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] + + @pytest.mark.parametrize("key", [{1}, {1: 1}]) + def test_setitem_dict_and_set_deprecated(self, key): + # GH#42825 + ser = Series([1, 2]) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1 + + @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)]) + def test_setitem_dict_and_set_deprecated_multiindex(self, key): + # GH#42825 + ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) + with tm.assert_produces_warning(FutureWarning): + ser.loc[key] = 1 From f04ec539eaf4ab1d03c1654e6f90b32c6b56f078 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 28 Dec 2021 15:13:26 +0100 Subject: [PATCH 2/2] Refactor --- pandas/core/frame.py | 27 ++----------------- pandas/core/indexing.py | 58 +++++++++++++++++++++-------------------- pandas/core/series.py | 33 +++++------------------ 3 files changed, 38 insertions(+), 80 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f69d7d2f463c1..fd5163bcb14ed 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -170,6 +170,7 @@ ) from pandas.core.indexing import ( check_bool_indexer, + check_deprecated_indexers, convert_to_index_sliceable, ) from pandas.core.internals import ( @@ -3456,32 +3457,8 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: for i in range(len(self.columns)): yield self._get_column_array(i) - def _check_deprecated_indexers(self, key): - if ( - isinstance(key, set) - or isinstance(key, tuple) - and any(isinstance(x, set) for x in key) - ): - warnings.warn( - "Passing a set as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if ( - isinstance(key, dict) - or isinstance(key, tuple) - and any(isinstance(x, dict) for x in key) - ): - warnings.warn( - "Passing a dict as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - def __getitem__(self, key): - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index dc7e0973b545c..19fbc43fea3b3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -643,7 +643,7 @@ def _get_setitem_indexer(self, key): if isinstance(key, tuple): for x in key: - self._check_deprecated_indexers(x) + check_deprecated_indexers(x) if self.axis is not None: return self._convert_tuple(key) @@ -702,7 +702,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): ) def __setitem__(self, key, value): - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) if isinstance(key, tuple): key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -896,7 +896,7 @@ def _getitem_nested_tuple(self, tup: tuple): # we should be able to match up the dimensionality here for key in tup: - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) # we have too many indexers for our dim, but have at least 1 # multi-index dimension, try to see if we have something like @@ -950,32 +950,8 @@ def _getitem_nested_tuple(self, tup: tuple): def _convert_to_indexer(self, key, axis: int): raise AbstractMethodError(self) - def _check_deprecated_indexers(self, key): - if ( - isinstance(key, set) - or isinstance(key, tuple) - and any(isinstance(x, set) for x in key) - ): - warnings.warn( - "Passing a set as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if ( - isinstance(key, dict) - or isinstance(key, tuple) - and any(isinstance(x, dict) for x in key) - ): - warnings.warn( - "Passing a dict as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - def __getitem__(self, key): - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) if type(key) is tuple: key = tuple(list(x) if is_iterator(x) else x for x in key) key = tuple(com.apply_if_callable(x, self.obj) for x in key) @@ -2477,3 +2453,29 @@ def need_slice(obj: slice) -> bool: or obj.stop is not None or (obj.step is not None and obj.step != 1) ) + + +def check_deprecated_indexers(key) -> None: + """Checks if the key is a deprecated indexer.""" + if ( + isinstance(key, set) + or isinstance(key, tuple) + and any(isinstance(x, set) for x in key) + ): + warnings.warn( + "Passing a set as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + isinstance(key, dict) + or isinstance(key, tuple) + and any(isinstance(x, dict) for x in key) + ): + warnings.warn( + "Passing a dict as an indexer is deprecated and will raise in " + "a future version. Use a list instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8e21549a3ea1d..2c747b20e0a0a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -124,7 +124,10 @@ ensure_index, ) import pandas.core.indexes.base as ibase -from pandas.core.indexing import check_bool_indexer +from pandas.core.indexing import ( + check_bool_indexer, + check_deprecated_indexers, +) from pandas.core.internals import ( SingleArrayManager, SingleBlockManager, @@ -938,32 +941,8 @@ def _slice(self, slobj: slice, axis: int = 0) -> Series: # _slice is *always* positional return self._get_values(slobj) - def _check_deprecated_indexers(self, key): - if ( - isinstance(key, set) - or isinstance(key, tuple) - and any(isinstance(x, set) for x in key) - ): - warnings.warn( - "Passing a set as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if ( - isinstance(key, dict) - or isinstance(key, tuple) - and any(isinstance(x, dict) for x in key) - ): - warnings.warn( - "Passing a dict as an indexer is deprecated and will raise in " - "a future version. Use a list instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - def __getitem__(self, key): - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) key = com.apply_if_callable(key, self) if key is Ellipsis: @@ -1090,7 +1069,7 @@ def _get_value(self, label, takeable: bool = False): return self.index._get_values_for_loc(self, loc, label) def __setitem__(self, key, value) -> None: - self._check_deprecated_indexers(key) + check_deprecated_indexers(key) key = com.apply_if_callable(key, self) cacher_needs_updating = self._check_is_chained_assignment_possible()