Skip to content

CLN: Remove unnecessary copy keyword #56420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,8 +1206,8 @@ def assert_frame_equal(

# compare by blocks
if by_blocks:
rblocks = right._to_dict_of_blocks(copy=False)
lblocks = left._to_dict_of_blocks(copy=False)
rblocks = right._to_dict_of_blocks()
lblocks = left._to_dict_of_blocks()
for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
assert dtype in lblocks
assert dtype in rblocks
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -12484,7 +12484,7 @@ def isin_(x):
# ----------------------------------------------------------------------
# Internal Interface Methods

def _to_dict_of_blocks(self, copy: bool = True):
def _to_dict_of_blocks(self):
"""
Return a dict of dtype -> Constructor Types that
each is a homogeneous dtype.
Expand All @@ -12496,7 +12496,7 @@ def _to_dict_of_blocks(self, copy: bool = True):
mgr = cast(BlockManager, mgr_to_mgr(mgr, "block"))
return {
k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self)
for k, v, in mgr.to_dict(copy=copy).items()
for k, v, in mgr.to_dict().items()
}

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2012,7 +2012,7 @@ def _get_data_to_aggregate(
mgr = obj._mgr

if numeric_only:
mgr = mgr.get_numeric_data(copy=False)
mgr = mgr.get_numeric_data()
return mgr

def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
Expand Down
44 changes: 11 additions & 33 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,17 +495,12 @@ def is_view(self) -> bool:

def _get_data_subset(self, predicate: Callable) -> Self:
blocks = [blk for blk in self.blocks if predicate(blk.values)]
return self._combine(blocks, copy=False)
return self._combine(blocks)

def get_bool_data(self, copy: bool = False) -> Self:
def get_bool_data(self) -> Self:
"""
Select blocks that are bool-dtype and columns from object-dtype blocks
that are all-bool.

Parameters
----------
copy : bool, default False
Whether to copy the blocks
"""

new_blocks = []
Expand All @@ -518,26 +513,16 @@ def get_bool_data(self, copy: bool = False) -> Self:
nbs = blk._split()
new_blocks.extend(nb for nb in nbs if nb.is_bool)

return self._combine(new_blocks, copy)
return self._combine(new_blocks)

def get_numeric_data(self, copy: bool = False) -> Self:
"""
Parameters
----------
copy : bool, default False
Whether to copy the blocks
"""
def get_numeric_data(self) -> Self:
numeric_blocks = [blk for blk in self.blocks if blk.is_numeric]
if len(numeric_blocks) == len(self.blocks):
# Avoid somewhat expensive _combine
if copy:
return self.copy(deep=True)
return self
return self._combine(numeric_blocks, copy)
return self._combine(numeric_blocks)

def _combine(
self, blocks: list[Block], copy: bool = True, index: Index | None = None
) -> Self:
def _combine(self, blocks: list[Block], index: Index | None = None) -> Self:
"""return a new manager with the blocks"""
if len(blocks) == 0:
if self.ndim == 2:
Expand All @@ -554,11 +539,8 @@ def _combine(
inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

new_blocks: list[Block] = []
# TODO(CoW) we could optimize here if we know that the passed blocks
# are fully "owned" (eg created from an operation, not coming from
# an existing manager)
for b in blocks:
nb = b.copy(deep=copy)
nb = b.copy(deep=False)
nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer])
new_blocks.append(nb)

Expand Down Expand Up @@ -1630,14 +1612,10 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False)
return bm

def to_dict(self, copy: bool = True) -> dict[str, Self]:
def to_dict(self) -> dict[str, Self]:
"""
Return a dict of str(dtype) -> BlockManager

Parameters
----------
copy : bool, default True

Returns
-------
values : a dict of dtype -> BlockManager
Expand All @@ -1648,7 +1626,7 @@ def to_dict(self, copy: bool = True) -> dict[str, Self]:
bd.setdefault(str(b.dtype), []).append(b)

# TODO(EA2D): the combine will be unnecessary with 2D EAs
return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()}
return {dtype: self._combine(blocks) for dtype, blocks in bd.items()}

def as_array(
self,
Expand Down Expand Up @@ -2028,9 +2006,9 @@ def array_values(self) -> ExtensionArray:
"""The array that Series.array returns"""
return self._block.array_values

def get_numeric_data(self, copy: bool = False) -> Self:
def get_numeric_data(self) -> Self:
if self._block.is_numeric:
return self.copy(deep=copy)
return self.copy(deep=False)
return self.make_empty()

@property
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_from_records_sequencelike(self):

# this is actually tricky to create the recordlike arrays and
# have the dtypes be intact
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
tuples = []
columns = []
dtypes = []
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_from_records_dictlike(self):

# columns is in a different order here than the actual items iterated
# from the dict
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
columns = []
for b in blocks.values():
columns.extend(b.columns)
Expand Down
18 changes: 1 addition & 17 deletions pandas/tests/frame/methods/test_to_dict_of_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,6 @@


class TestToDictOfBlocks:
def test_copy_blocks(self, float_frame):
# GH#9607
df = DataFrame(float_frame, copy=True)
column = df.columns[0]

# use the default copy=True, change a column
_last_df = None
blocks = df._to_dict_of_blocks(copy=True)
for _df in blocks.values():
_last_df = _df
if column in _df:
_df.loc[:, column] = _df[column] + 1

# make sure we did not change the original DataFrame
assert _last_df is not None and not _last_df[column].equals(df[column])

@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_no_copy_blocks(self, float_frame, using_copy_on_write):
# GH#9607
Expand All @@ -38,7 +22,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write):

_last_df = None
# use the copy=False, change a column
blocks = df._to_dict_of_blocks(copy=False)
blocks = df._to_dict_of_blocks()
for _df in blocks.values():
_last_df = _df
if column in _df:
Expand Down
32 changes: 0 additions & 32 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,24 +790,6 @@ def test_get_numeric_data(self, using_copy_on_write):
np.array([100.0, 200.0, 300.0]),
)

numeric2 = mgr.get_numeric_data(copy=True)
tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"]))
numeric2.iset(
numeric2.items.get_loc("float"),
np.array([1000.0, 2000.0, 3000.0]),
inplace=True,
)
if using_copy_on_write:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([1.0, 1.0, 1.0]),
)
else:
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("float")).internal_values(),
np.array([100.0, 200.0, 300.0]),
)

def test_get_bool_data(self, using_copy_on_write):
mgr = create_mgr(
"int: int; float: float; complex: complex;"
Expand Down Expand Up @@ -835,20 +817,6 @@ def test_get_bool_data(self, using_copy_on_write):
np.array([True, False, True]),
)

# Check sharing
bools2 = mgr.get_bool_data(copy=True)
bools2.iset(0, np.array([False, True, False]))
if using_copy_on_write:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, True, True]),
)
else:
tm.assert_numpy_array_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
np.array([True, False, True]),
)

def test_unicode_repr_doesnt_raise(self):
repr(create_mgr("b,\u05d0: object"))

Expand Down