From 819413f268de6c423f72e74954fd3cc419230041 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 28 Nov 2023 00:27:27 +0100 Subject: [PATCH 1/5] CoW: Add warning for slicing a Series with a MultiIndex --- pandas/core/series.py | 2 +- pandas/tests/copy_view/test_indexing.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3f7ac75e623a2..407eab82ff248 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1213,7 +1213,7 @@ def _get_value(self, label, takeable: bool = False): new_ser = self._constructor( new_values, index=new_index, name=self.name, copy=False ) - if using_copy_on_write() and isinstance(loc, slice): + if isinstance(loc, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 72b7aea3709c0..2e269c97c4fd3 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1150,13 +1150,12 @@ def test_set_value_copy_only_necessary_column( assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) -def test_series_midx_slice(using_copy_on_write): +def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) result = ser[1] assert np.shares_memory(get_array(ser), get_array(result)) - # TODO(CoW-warn) should warn -> reference is only tracked in CoW mode, so - # warning is not triggered - result.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[0] = 100 if using_copy_on_write: expected = Series( [1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) From 44c2d1e92b8378cf665f9f3d89e57fedde1bc6e3 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 28 Nov 2023 00:33:50 +0100 Subject: [PATCH 2/5] Update --- pandas/core/series.py | 2 +- pandas/tests/copy_view/test_indexing.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 407eab82ff248..94a4b53cea81a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1171,7 +1171,7 @@ def _get_values_tuple(self, key: tuple): # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) new_ser = self._constructor(self._values[indexer], index=new_index, copy=False) - if using_copy_on_write() and isinstance(indexer, slice): + if isinstance(indexer, slice): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 2e269c97c4fd3..14109ee584e82 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1189,16 +1189,15 @@ def test_getitem_midx_slice( assert df.iloc[0, 0] == 100 -def test_series_midx_tuples_slice(using_copy_on_write): +def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): ser = Series( [1, 2, 3], index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), ) result = ser[(1, 2)] assert np.shares_memory(get_array(ser), get_array(result)) - # TODO(CoW-warn) should warn -> reference is only tracked in CoW mode, so - # warning is not triggered - result.iloc[0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[0] = 100 if using_copy_on_write: expected = Series( [1, 2, 3], From 02a2479881461d18b36bac8afe371bb7cded688c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 28 Nov 2023 00:58:43 +0100 Subject: [PATCH 3/5] CoW: Avoid warning for ArrowDtypes when setting inplace --- pandas/core/internals/managers.py | 20 ++++++++++++++------ pandas/tests/copy_view/test_interp_fillna.py | 5 +++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..8fce2be8e4e44 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -54,7 +54,11 @@ ) import pandas.core.algorithms as algos -from pandas.core.arrays import DatetimeArray +from pandas.core.arrays import ( + ArrowExtensionArray, + ArrowStringArray, + DatetimeArray, +) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -1343,11 +1347,15 @@ def column_setitem( intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ if warn_copy_on_write() and not self._has_no_reference(loc): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) + if not isinstance( + self.blocks[self.blknos[loc]].values, + (ArrowExtensionArray, ArrowStringArray), + ): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) elif using_copy_on_write() and not self._has_no_reference(loc): blkno = self.blknos[loc] # Split blocks to only copy the column we want to modify diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index cdb06de90a568..c2482645b072e 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -344,8 +344,9 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - # TODO(CoW-warn) should this warn for ArrowDtype? - with tm.assert_cow_warning(warn_copy_on_write): + with tm.assert_cow_warning( + warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype + ): df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: tm.assert_frame_equal(df_orig, view) From 18460710b75daa1ca613c8af64967753ede91a5c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 28 Nov 2023 00:59:36 +0100 Subject: [PATCH 4/5] Revert "CoW: Avoid warning for ArrowDtypes when setting inplace" This reverts commit 02a2479881461d18b36bac8afe371bb7cded688c. --- pandas/core/internals/managers.py | 20 ++++++-------------- pandas/tests/copy_view/test_interp_fillna.py | 5 ++--- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8fce2be8e4e44..843441e4865c7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -54,11 +54,7 @@ ) import pandas.core.algorithms as algos -from pandas.core.arrays import ( - ArrowExtensionArray, - ArrowStringArray, - DatetimeArray, -) +from pandas.core.arrays import DatetimeArray from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -1347,15 +1343,11 @@ def column_setitem( intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ if warn_copy_on_write() and not self._has_no_reference(loc): - if not isinstance( - self.blocks[self.blknos[loc]].values, - (ArrowExtensionArray, ArrowStringArray), - ): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) elif using_copy_on_write() and not self._has_no_reference(loc): blkno = self.blknos[loc] # Split blocks to only copy the column we want to modify diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index c2482645b072e..cdb06de90a568 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -344,9 +344,8 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - with tm.assert_cow_warning( - warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype - ): + # TODO(CoW-warn) should this warn for ArrowDtype? + with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: tm.assert_frame_equal(df_orig, view) From 964872f9d31d55ff5c39c43b1a5543d5bd7913c5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Dec 2023 12:14:12 +0100 Subject: [PATCH 5/5] update test --- pandas/tests/copy_view/test_indexing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 14109ee584e82..1f9125249d094 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1152,13 +1152,16 @@ def test_set_value_copy_only_necessary_column( def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) + ser_orig = ser.copy() result = ser[1] assert np.shares_memory(get_array(ser), get_array(result)) with tm.assert_cow_warning(warn_copy_on_write): result.iloc[0] = 100 if using_copy_on_write: + tm.assert_series_equal(ser, ser_orig) + else: expected = Series( - [1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) + [100, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]]) ) tm.assert_series_equal(ser, expected)