diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fec8639f5a44d..987535072a2bf 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -11,6 +11,7 @@ import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare +from pandas._typing import ArrayLike from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -340,11 +341,12 @@ def iget(self, i): def set(self, locs, values): """ - Modify Block in-place with new item value + Modify block values in-place with new item value. - Returns - ------- - None + Notes + ----- + `set` never creates a new array or new Block, whereas `setitem` _may_ + create a new array and always creates a new Block. """ self.values[locs] = values @@ -793,7 +795,7 @@ def _replace_single(self, *args, **kwargs): def setitem(self, indexer, value): """ - Set the value inplace, returning a a maybe different typed block. + Attempt self.values[indexer] = value, possibly creating a new array. Parameters ---------- @@ -1635,12 +1637,15 @@ def iget(self, col): raise IndexError(f"{self} only contains one item") return self.values - def should_store(self, value): + def should_store(self, value: ArrayLike) -> bool: + """ + Can we set the given array-like value inplace? + """ return isinstance(value, self._holder) - def set(self, locs, values, check=False): + def set(self, locs, values): assert locs.tolist() == [0] - self.values = values + self.values[:] = values def putmask( self, mask, new, align=True, inplace=False, axis=0, transpose=False, @@ -1751,7 +1756,7 @@ def is_numeric(self): def setitem(self, indexer, value): """ - Set the value inplace, returning a same-typed block. + Attempt self.values[indexer] = value, possibly creating a new array. This differs from Block.setitem by not allowing setitem to change the dtype of the Block. @@ -2057,7 +2062,7 @@ def to_native_types( ) return formatter.get_result_as_array() - def should_store(self, value) -> bool: + def should_store(self, value: ArrayLike) -> bool: # when inserting a column should not coerce integers to floats # unnecessarily return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype @@ -2075,7 +2080,7 @@ def _can_hold_element(self, element: Any) -> bool: element, (float, int, complex, np.float_, np.int_) ) and not isinstance(element, (bool, np.bool_)) - def should_store(self, value) -> bool: + def should_store(self, value: ArrayLike) -> bool: return issubclass(value.dtype.type, np.complexfloating) @@ -2094,7 +2099,7 @@ def _can_hold_element(self, element: Any) -> bool: ) return is_integer(element) - def should_store(self, value) -> bool: + def should_store(self, value: ArrayLike) -> bool: return is_integer_dtype(value) and value.dtype == self.dtype @@ -2105,6 +2110,9 @@ class DatetimeLikeBlockMixin: def _holder(self): return DatetimeArray + def should_store(self, value): + return is_dtype_equal(self.dtype, value.dtype) + @property def fill_value(self): return np.datetime64("NaT", "ns") @@ -2241,16 +2249,9 @@ def to_native_types( ).reshape(i8values.shape) return np.atleast_2d(result) - def should_store(self, value) -> bool: - return is_datetime64_dtype(value.dtype) - def set(self, locs, values): """ - Modify Block in-place with new item value - - Returns - ------- - None + See Block.set.__doc__ """ values = conversion.ensure_datetime64ns(values, copy=False) @@ -2274,6 +2275,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") + should_store = DatetimeBlock.should_store @property def _holder(self): @@ -2483,9 +2485,6 @@ def fillna(self, value, **kwargs): ) return super().fillna(value, **kwargs) - def should_store(self, value) -> bool: - return is_timedelta64_dtype(value.dtype) - def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values @@ -2527,7 +2526,7 @@ def _can_hold_element(self, element: Any) -> bool: return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) - def should_store(self, value) -> bool: + def should_store(self, value: ArrayLike) -> bool: return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( value ) @@ -2619,7 +2618,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"] def _can_hold_element(self, element: Any) -> bool: return True - def should_store(self, value) -> bool: + def should_store(self, value: ArrayLike) -> bool: return not ( issubclass( value.dtype.type, @@ -2868,6 +2867,9 @@ def __init__(self, values, placement, ndim=None): def _holder(self): return Categorical + def should_store(self, arr: ArrayLike): + return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype) + def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ values = self.values diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f6b9e9a44ba14..9664f8d7212ad 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -694,6 +694,17 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 + def test_iloc_setitem_categorical_updates_inplace(self): + # Mixed dtype ensures we go through take_split_path in setitem_with_indexer + cat = pd.Categorical(["A", "B", "C"]) + df = pd.DataFrame({1: cat, 2: [1, 2, 3]}) + + # This should modify our original values in-place + df.iloc[:, 0] = cat[::-1] + + expected = pd.Categorical(["C", "B", "A"]) + tm.assert_categorical_equal(cat, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index deffeb0a1800c..bbf968aef4a5c 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1189,6 +1189,23 @@ def test_binop_other(self, op, value, dtype): tm.assert_series_equal(result, expected) +class TestShouldStore: + def test_should_store_categorical(self): + cat = pd.Categorical(["A", "B", "C"]) + df = pd.DataFrame(cat) + blk = df._data.blocks[0] + + # matching dtype + assert blk.should_store(cat) + assert blk.should_store(cat[:-1]) + + # different dtype + assert not blk.should_store(cat.as_ordered()) + + # ndarray instead of Categorical + assert not blk.should_store(np.asarray(cat)) + + @pytest.mark.parametrize( "typestr, holder", [