From a5bf26a696d96c5ad3b38ae1d3f0cdf640e63a47 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 9 Dec 2023 00:02:52 +0100 Subject: [PATCH 1/3] CLN: Remove unnecessary copy keyword --- pandas/core/groupby/generic.py | 2 +- pandas/core/internals/managers.py | 25 +++++------------- pandas/tests/internals/test_internals.py | 32 ------------------------ 3 files changed, 7 insertions(+), 52 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8bef167b747e2..70379336e5061 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2019,7 +2019,7 @@ def _get_data_to_aggregate( mgr = obj._mgr if numeric_only: - mgr = mgr.get_numeric_data(copy=False) + mgr = mgr.get_numeric_data() return mgr def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b315a318a9a0a..2dd0a56c1e21a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -459,15 +459,10 @@ def _get_data_subset(self, predicate: Callable) -> Self: blocks = [blk for blk in self.blocks if predicate(blk.values)] return self._combine(blocks, copy=False) - def get_bool_data(self, copy: bool = False) -> Self: + def get_bool_data(self) -> Self: """ Select blocks that are bool-dtype and columns from object-dtype blocks that are all-bool. - - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks """ new_blocks = [] @@ -480,22 +475,14 @@ def get_bool_data(self, copy: bool = False) -> Self: nbs = blk._split() new_blocks.extend(nb for nb in nbs if nb.is_bool) - return self._combine(new_blocks, copy) + return self._combine(new_blocks, copy=False) - def get_numeric_data(self, copy: bool = False) -> Self: - """ - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks - """ + def get_numeric_data(self) -> Self: numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] if len(numeric_blocks) == len(self.blocks): # Avoid somewhat expensive _combine - if copy: - return self.copy(deep=True) return self - return self._combine(numeric_blocks, copy) + return self._combine(numeric_blocks, copy=False) def _combine( self, blocks: list[Block], copy: bool = True, index: Index | None = None @@ -1963,9 +1950,9 @@ def array_values(self): """The array that Series.array returns""" return self._block.array_values - def get_numeric_data(self, copy: bool = False) -> Self: + def get_numeric_data(self) -> Self: if self._block.is_numeric: - return self.copy(deep=copy) + return self.copy(deep=False) return self.make_empty() @property diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b7108896f01ed..591b9fdbd3e80 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -777,24 +777,6 @@ def test_get_numeric_data(self, using_copy_on_write): np.array([100.0, 200.0, 300.0]), ) - numeric2 = mgr.get_numeric_data(copy=True) - tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) - numeric2.iset( - numeric2.items.get_loc("float"), - np.array([1000.0, 2000.0, 3000.0]), - inplace=True, - ) - if using_copy_on_write: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([1.0, 1.0, 1.0]), - ) - else: - tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([100.0, 200.0, 300.0]), - ) - def test_get_bool_data(self, using_copy_on_write): mgr = create_mgr( "int: int; float: float; complex: complex;" @@ -822,20 +804,6 @@ def test_get_bool_data(self, using_copy_on_write): np.array([True, False, True]), ) - # Check sharing - bools2 = mgr.get_bool_data(copy=True) - bools2.iset(0, np.array([False, True, False])) - if using_copy_on_write: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, True, True]), - ) - else: - tm.assert_numpy_array_equal( - mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, False, True]), - ) - def test_unicode_repr_doesnt_raise(self): repr(create_mgr("b,\u05d0: object")) From 2001d4f6bc4c68b5888988120a1d654f655af5fc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 9 Dec 2023 00:07:43 +0100 Subject: [PATCH 2/3] CLN: Remove unnecessary copy keyword --- pandas/_testing/asserters.py | 4 ++-- pandas/core/frame.py | 4 ++-- pandas/core/internals/managers.py | 23 ++++++------------- .../frame/constructors/test_from_records.py | 4 ++-- .../frame/methods/test_to_dict_of_blocks.py | 18 +-------------- 5 files changed, 14 insertions(+), 39 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 0591394f5d9ed..c7fff3d44eeca 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1185,8 +1185,8 @@ def assert_frame_equal( # compare by blocks if by_blocks: - rblocks = right._to_dict_of_blocks(copy=False) - lblocks = left._to_dict_of_blocks(copy=False) + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3a4e8fdd91362..91d659b7ec1e9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12234,7 +12234,7 @@ def isin_(x): # ---------------------------------------------------------------------- # Internal Interface Methods - def _to_dict_of_blocks(self, copy: bool = True): + def _to_dict_of_blocks(self): """ Return a dict of dtype -> Constructor Types that each is a homogeneous dtype. @@ -12247,7 +12247,7 @@ def _to_dict_of_blocks(self, copy: bool = True): mgr = cast(BlockManager, mgr) return { k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) - for k, v, in mgr.to_dict(copy=copy).items() + for k, v, in mgr.to_dict().items() } @property diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2dd0a56c1e21a..d09c7b1537152 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -457,7 +457,7 @@ def is_view(self) -> bool: def _get_data_subset(self, predicate: Callable) -> Self: blocks = [blk for blk in self.blocks if predicate(blk.values)] - return self._combine(blocks, copy=False) + return self._combine(blocks) def get_bool_data(self) -> Self: """ @@ -475,18 +475,16 @@ def get_bool_data(self) -> Self: nbs = blk._split() new_blocks.extend(nb for nb in nbs if nb.is_bool) - return self._combine(new_blocks, copy=False) + return self._combine(new_blocks) def get_numeric_data(self) -> Self: numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] if len(numeric_blocks) == len(self.blocks): # Avoid somewhat expensive _combine return self - return self._combine(numeric_blocks, copy=False) + return self._combine(numeric_blocks) - def _combine( - self, blocks: list[Block], copy: bool = True, index: Index | None = None - ) -> Self: + def _combine(self, blocks: list[Block], index: Index | None = None) -> Self: """return a new manager with the blocks""" if len(blocks) == 0: if self.ndim == 2: @@ -503,11 +501,8 @@ def _combine( inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) new_blocks: list[Block] = [] - # TODO(CoW) we could optimize here if we know that the passed blocks - # are fully "owned" (eg created from an operation, not coming from - # an existing manager) for b in blocks: - nb = b.copy(deep=copy) + nb = b.copy(deep=False) nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) new_blocks.append(nb) @@ -1558,14 +1553,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager: bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) return bm - def to_dict(self, copy: bool = True) -> dict[str, Self]: + def to_dict(self) -> dict[str, Self]: """ Return a dict of str(dtype) -> BlockManager - Parameters - ---------- - copy : bool, default True - Returns ------- values : a dict of dtype -> BlockManager @@ -1576,7 +1567,7 @@ def to_dict(self, copy: bool = True) -> dict[str, Self]: bd.setdefault(str(b.dtype), []).append(b) # TODO(EA2D): the combine will be unnecessary with 2D EAs - return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} + return {dtype: self._combine(blocks) for dtype, blocks in bd.items()} def as_array( self, diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 59dca5055f170..53cc8810e3f05 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -80,7 +80,7 @@ def test_from_records_sequencelike(self): # this is actually tricky to create the recordlike arrays and # have the dtypes be intact - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() tuples = [] columns = [] dtypes = [] @@ -169,7 +169,7 @@ def test_from_records_dictlike(self): # columns is in a different order here than the actual items iterated # from the dict - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() columns = [] for b in blocks.values(): columns.extend(b.columns) diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 906e74230a762..aecedfd10a6cd 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -14,22 +14,6 @@ class TestToDictOfBlocks: - def test_copy_blocks(self, float_frame): - # GH#9607 - df = DataFrame(float_frame, copy=True) - column = df.columns[0] - - # use the default copy=True, change a column - _last_df = None - blocks = df._to_dict_of_blocks(copy=True) - for _df in blocks.values(): - _last_df = _df - if column in _df: - _df.loc[:, column] = _df[column] + 1 - - # make sure we did not change the original DataFrame - assert _last_df is not None and not _last_df[column].equals(df[column]) - def test_no_copy_blocks(self, float_frame, using_copy_on_write): # GH#9607 df = DataFrame(float_frame, copy=True) @@ -37,7 +21,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): _last_df = None # use the copy=False, change a column - blocks = df._to_dict_of_blocks(copy=False) + blocks = df._to_dict_of_blocks() for _df in blocks.values(): _last_df = _df if column in _df: From 8a06aefc2d07bade0a597172fad453254d83f515 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 9 Dec 2023 00:35:21 +0100 Subject: [PATCH 3/3] Fixup --- .../frame/methods/test_to_dict_of_blocks.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 1d2e31d24a9b7..f7d9dc914a2ee 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -14,22 +14,6 @@ class TestToDictOfBlocks: - def test_copy_blocks(self, float_frame): - # GH#9607 - df = DataFrame(float_frame, copy=True) - column = df.columns[0] - - # use the default copy=True, change a column - _last_df = None - blocks = df._to_dict_of_blocks(copy=True) - for _df in blocks.values(): - _last_df = _df - if column in _df: - _df.loc[:, column] = _df[column] + 1 - - # make sure we did not change the original DataFrame - assert _last_df is not None and not _last_df[column].equals(df[column]) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_no_copy_blocks(self, float_frame, using_copy_on_write): # GH#9607