Skip to content

DEPR: downcast kwd in fillna #45105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5207,7 +5207,7 @@ def fillna(
axis: Axis | None = None,
inplace: bool = False,
limit=None,
downcast=None,
downcast=lib.no_default,
) -> DataFrame | None:
return super().fillna(
value=value,
Expand Down Expand Up @@ -10862,7 +10862,7 @@ def ffill(
axis: None | Axis = None,
inplace: bool = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> DataFrame | None:
return super().ffill(axis, inplace, limit, downcast)

Expand All @@ -10872,7 +10872,7 @@ def bfill(
axis: None | Axis = None,
inplace: bool = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> DataFrame | None:
return super().bfill(axis, inplace, limit, downcast)

Expand Down
26 changes: 22 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6258,7 +6258,7 @@ def fillna(
axis=None,
inplace: bool_t = False,
limit=None,
downcast=None,
downcast=lib.no_default,
) -> NDFrameT | None:
"""
Fill NA/NaN values using the specified method.
Expand Down Expand Up @@ -6293,6 +6293,8 @@ def fillna(
or the string 'infer' which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).

.. deprecated:: 1.4.0

Returns
-------
{klass} or None
Expand Down Expand Up @@ -6372,6 +6374,17 @@ def fillna(
inplace = validate_bool_kwarg(inplace, "inplace")
value, method = validate_fillna_kwargs(value, method)

if downcast is not lib.no_default:
warnings.warn(
f"{type(self).__name__}.fillna 'downcast' keyword is deprecated "
"and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
pass
else:
downcast = None

self._consolidate_inplace()

# set the default here, so functions examining the signaure
Expand Down Expand Up @@ -6435,7 +6448,11 @@ def fillna(
for k, v in value.items():
if k not in result:
continue
downcast_k = downcast if not is_dict else downcast.get(k)
downcast_k = (
downcast if not is_dict else downcast.get(k, lib.no_default)
)
if downcast_k is None:
downcast_k = lib.no_default
result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k)
return result if not inplace else None

Expand Down Expand Up @@ -6468,7 +6485,7 @@ def ffill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> NDFrameT | None:
"""
Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
Expand All @@ -6490,7 +6507,7 @@ def bfill(
axis: None | Axis = None,
inplace: bool_t = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> NDFrameT | None:
"""
Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
Expand Down Expand Up @@ -9008,6 +9025,7 @@ def _where(
# make sure we are boolean
fill_value = bool(inplace)
cond = cond.fillna(fill_value)
cond = cond.infer_objects()

msg = "Boolean array expected for the condition, not {dtype}"

Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2735,6 +2735,13 @@ def fillna(self, value=None, downcast=None):
DataFrame.fillna : Fill NaN values of a DataFrame.
Series.fillna : Fill NaN Values of a Series.
"""
if downcast is not None:
warnings.warn(
"Index.fillna 'downcast' keyword is deprecated and will be "
"removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)

value = self._require_scalar(value)
if self.hasnans:
Expand Down
31 changes: 25 additions & 6 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def fillna(
if self._can_hold_element(value):
nb = self if inplace else self.copy()
putmask_inplace(nb.values, mask, value)
return nb._maybe_downcast([nb], downcast)
return nb._maybe_downcast([nb], downcast, deprecate=True)

if noop:
# we can't process the value, but nothing to do
Expand Down Expand Up @@ -515,17 +515,19 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
return res_blocks

@final
def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
def _maybe_downcast(
self, blocks: list[Block], downcast=None, deprecate: bool = False
) -> list[Block]:

if self.dtype == _dtype_obj:
# GH#44241 We downcast regardless of the argument;
# respecting 'downcast=None' may be worthwhile at some point,
# but ATM it breaks too much existing code.
# split and convert the blocks

return extend_blocks(
casted = extend_blocks(
[blk.convert(datetime=True, numeric=False) for blk in blocks]
)
return casted

if downcast is None:
return blocks
Expand All @@ -534,7 +536,19 @@ def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
# TODO: not reached, deprecate in favor of downcast=None
return blocks

return extend_blocks([b._downcast_2d(downcast) for b in blocks])
casted = extend_blocks([b._downcast_2d(downcast) for b in blocks])
if deprecate and not all(is_dtype_equal(x.dtype, self.dtype) for x in casted):
# i.e. we did *some* casting
warnings.warn(
"Casting behavior of .fillna, .interpolate, .ffill, .bfill "
f"for {self.dtype}-dtype columns is deprecated. In a future version, "
"these columns will not be automatically downcast. To retain "
"the old behavior, explicitly cast the resulting columns "
"to the desired dtype.",
FutureWarning,
stacklevel=find_stack_level(),
)
return casted

@final
@maybe_split
Expand Down Expand Up @@ -1077,7 +1091,7 @@ def interpolate(
)

nb = self.make_block_same_class(data)
return nb._maybe_downcast([nb], downcast)
return nb._maybe_downcast([nb], downcast, deprecate=True)

def take_nd(
self,
Expand Down Expand Up @@ -1808,6 +1822,11 @@ def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
) -> list[Block]:

if not self.values._hasnans:
# Avoid possible upcast
# TODO: respect 'inplace' keyword
return self.copy()

if not self._can_hold_element(value) and self.dtype.kind != "m":
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
is_integer,
is_integer_dtype,
is_list_like,
is_nested_list_like,
Expand Down Expand Up @@ -201,26 +202,30 @@ def __internal_pivot_table(
to_unstack.append(i)
else:
to_unstack.append(name)
table = agged.unstack(to_unstack)

table = agged.unstack(to_unstack, fill_value=fill_value)

if not dropna:
if isinstance(table.index, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.index.levels), names=table.index.names
)
table = table.reindex(m, axis=0)
table = table.reindex(m, axis=0, fill_value=fill_value)

if isinstance(table.columns, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.columns.levels), names=table.columns.names
)
table = table.reindex(m, axis=1)
table = table.reindex(m, axis=1, fill_value=fill_value)

if isinstance(table, ABCDataFrame):
table = table.sort_index(axis=1)

if fill_value is not None:
table = table.fillna(fill_value, downcast="infer")
table = table.fillna(fill_value)
table = table.infer_objects()
if aggfunc is len and not observed and is_integer(fill_value):
table = table.astype(np.int64)

if margins:
if dropna:
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4876,7 +4876,7 @@ def fillna(
axis=None,
inplace=False,
limit=None,
downcast=None,
downcast=lib.no_default,
) -> Series | None:
return super().fillna(
value=value,
Expand Down Expand Up @@ -5475,7 +5475,7 @@ def ffill(
axis: None | Axis = None,
inplace: bool = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> Series | None:
return super().ffill(axis, inplace, limit, downcast)

Expand All @@ -5485,7 +5485,7 @@ def bfill(
axis: None | Axis = None,
inplace: bool = False,
limit: None | int = None,
downcast=None,
downcast=lib.no_default,
) -> Series | None:
return super().bfill(axis, inplace, limit, downcast)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,9 @@ def test_where_axis(self, using_array_manager):
tm.assert_frame_equal(result, expected)

warn = FutureWarning if using_array_manager else None
msg = "Downcasting integer-dtype"
expected = DataFrame([[0, np.nan], [0, np.nan]])
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
with tm.assert_produces_warning(warn, match=msg):
result = df.where(mask, s, axis="columns")
tm.assert_frame_equal(result, expected)

Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,15 @@ def test_fillna_downcast(self):
# GH#15277
# infer int64 from float64
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna(0, downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
result = df.fillna(0, downcast="infer")
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

# infer int64 from float64 when fillna value is a dict
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna({"a": 0}, downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
result = df.fillna({"a": 0}, downcast="infer")
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -563,7 +565,9 @@ def test_fill_corner(self, float_frame, float_string_frame):
def test_fillna_downcast_dict(self):
# GH#40809
df = DataFrame({"col1": [1, np.nan]})
result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
msg = "fillna 'downcast' keyword"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
expected = DataFrame({"col1": [1, 2]})
tm.assert_frame_equal(result, expected)

Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def test_interp_combo(self):
expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
tm.assert_series_equal(result, expected)

result = df["A"].interpolate(downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
result = df["A"].interpolate(downcast="infer")
expected = Series([1, 2, 3, 4], name="A")
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -160,7 +161,8 @@ def test_interp_alt_scipy(self):
expected.loc[5, "A"] = 6
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="barycentric", downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
result = df.interpolate(method="barycentric", downcast="infer")
tm.assert_frame_equal(result, expected.astype(np.int64))

result = df.interpolate(method="krogh")
Expand Down Expand Up @@ -280,7 +282,8 @@ def test_interp_inplace(self):
tm.assert_frame_equal(result, expected)

result = df.copy()
return_value = result["a"].interpolate(inplace=True, downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
return_value = result["a"].interpolate(inplace=True, downcast="infer")
assert return_value is None
tm.assert_frame_equal(result, expected.astype("int64"))

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ def test_logical_with_nas(self):
expected = Series([True, True])
tm.assert_series_equal(result, expected)

result = d["a"].fillna(False, downcast=False) | d["b"]
msg = "Series.fillna 'downcast' keyword is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = d["a"].fillna(False, downcast=False) | d["b"]
expected = Series([True, True])
tm.assert_series_equal(result, expected)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,8 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):

expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C")
if operation == "agg":
expected = expected.fillna(0, downcast="infer")
with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
expected = expected.fillna(0, downcast="infer")
grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
result = getattr(grouped, operation)(sum)
tm.assert_series_equal(result, expected)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,8 @@ def test_fillna(self, index):
msg = "does not support 'downcast'"
with pytest.raises(NotImplementedError, match=msg):
# For now at least, we only raise if there are NAs present
idx.fillna(idx[0], downcast="infer")
with tm.assert_produces_warning(FutureWarning):
idx.fillna(idx[0], downcast="infer")

expected = np.array([False] * len(idx), dtype=bool)
expected[1] = True
Expand Down
Loading