pandas-dev · jbrockmendel · Dec 23, 2021 · Dec 29, 2021
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5207,7 +5207,7 @@ def fillna(
         axis: Axis | None = None,
         inplace: bool = False,
         limit=None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> DataFrame | None:
         return super().fillna(
             value=value,
@@ -10862,7 +10862,7 @@ def ffill(
         axis: None | Axis = None,
         inplace: bool = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> DataFrame | None:
         return super().ffill(axis, inplace, limit, downcast)
 
@@ -10872,7 +10872,7 @@ def bfill(
         axis: None | Axis = None,
         inplace: bool = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> DataFrame | None:
         return super().bfill(axis, inplace, limit, downcast)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6258,7 +6258,7 @@ def fillna(
         axis=None,
         inplace: bool_t = False,
         limit=None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> NDFrameT | None:
         """
         Fill NA/NaN values using the specified method.
@@ -6293,6 +6293,8 @@ def fillna(
             or the string 'infer' which will try to downcast to an appropriate
             equal type (e.g. float64 to int64 if possible).
 
+            .. deprecated:: 1.4.0
+
         Returns
         -------
         {klass} or None
@@ -6372,6 +6374,17 @@ def fillna(
         inplace = validate_bool_kwarg(inplace, "inplace")
         value, method = validate_fillna_kwargs(value, method)
 
+        if downcast is not lib.no_default:
+            warnings.warn(
+                f"{type(self).__name__}.fillna 'downcast' keyword is deprecated "
+                "and will be removed in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+            pass
+        else:
+            downcast = None
+
         self._consolidate_inplace()
 
         # set the default here, so functions examining the signaure
@@ -6435,7 +6448,11 @@ def fillna(
                 for k, v in value.items():
                     if k not in result:
                         continue
-                    downcast_k = downcast if not is_dict else downcast.get(k)
+                    downcast_k = (
+                        downcast if not is_dict else downcast.get(k, lib.no_default)
+                    )
+                    if downcast_k is None:
+                        downcast_k = lib.no_default
                     result[k] = result[k].fillna(v, limit=limit, downcast=downcast_k)
                 return result if not inplace else None
 
@@ -6468,7 +6485,7 @@ def ffill(
         axis: None | Axis = None,
         inplace: bool_t = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> NDFrameT | None:
         """
         Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
@@ -6490,7 +6507,7 @@ def bfill(
         axis: None | Axis = None,
         inplace: bool_t = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> NDFrameT | None:
         """
         Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
@@ -9008,6 +9025,7 @@ def _where(
         # make sure we are boolean
         fill_value = bool(inplace)
         cond = cond.fillna(fill_value)
+        cond = cond.infer_objects()
 
         msg = "Boolean array expected for the condition, not {dtype}"
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2735,6 +2735,13 @@ def fillna(self, value=None, downcast=None):
         DataFrame.fillna : Fill NaN values of a DataFrame.
         Series.fillna : Fill NaN Values of a Series.
         """
+        if downcast is not None:
+            warnings.warn(
+                "Index.fillna 'downcast' keyword is deprecated and will be "
+                "removed in a future version.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
 
         value = self._require_scalar(value)
         if self.hasnans:

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -458,7 +458,7 @@ def fillna(
         if self._can_hold_element(value):
             nb = self if inplace else self.copy()
             putmask_inplace(nb.values, mask, value)
-            return nb._maybe_downcast([nb], downcast)
+            return nb._maybe_downcast([nb], downcast, deprecate=True)
 
         if noop:
             # we can't process the value, but nothing to do
@@ -515,17 +515,19 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
         return res_blocks
 
     @final
-    def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
+    def _maybe_downcast(
+        self, blocks: list[Block], downcast=None, deprecate: bool = False
+    ) -> list[Block]:
 
         if self.dtype == _dtype_obj:
             # GH#44241 We downcast regardless of the argument;
             #  respecting 'downcast=None' may be worthwhile at some point,
             #  but ATM it breaks too much existing code.
             # split and convert the blocks
-
-            return extend_blocks(
+            casted = extend_blocks(
                 [blk.convert(datetime=True, numeric=False) for blk in blocks]
             )
+            return casted
 
         if downcast is None:
             return blocks
@@ -534,7 +536,19 @@ def _maybe_downcast(self, blocks: list[Block], downcast=None) -> list[Block]:
             # TODO: not reached, deprecate in favor of downcast=None
             return blocks
 
-        return extend_blocks([b._downcast_2d(downcast) for b in blocks])
+        casted = extend_blocks([b._downcast_2d(downcast) for b in blocks])
+        if deprecate and not all(is_dtype_equal(x.dtype, self.dtype) for x in casted):
+            # i.e. we did *some* casting
+            warnings.warn(
+                "Casting behavior of .fillna, .interpolate, .ffill, .bfill "
+                f"for {self.dtype}-dtype columns is deprecated. In a future version, "
+                "these columns will not be automatically downcast. To retain "
+                "the old behavior, explicitly cast the resulting columns "
+                "to the desired dtype.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
+        return casted
 
     @final
     @maybe_split
@@ -1077,7 +1091,7 @@ def interpolate(
         )
 
         nb = self.make_block_same_class(data)
-        return nb._maybe_downcast([nb], downcast)
+        return nb._maybe_downcast([nb], downcast, deprecate=True)
 
     def take_nd(
         self,
@@ -1808,6 +1822,11 @@ def fillna(
         self, value, limit=None, inplace: bool = False, downcast=None
     ) -> list[Block]:
 
+        if not self.values._hasnans:
+            # Avoid possible upcast
+            # TODO: respect 'inplace' keyword
+            return self.copy()
+
         if not self._can_hold_element(value) and self.dtype.kind != "m":
             # We support filling a DatetimeTZ with a `value` whose timezone
             #  is different by coercing to object.

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -23,6 +23,7 @@
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
+    is_integer,
     is_integer_dtype,
     is_list_like,
     is_nested_list_like,
@@ -201,26 +202,30 @@ def __internal_pivot_table(
                 to_unstack.append(i)
             else:
                 to_unstack.append(name)
-        table = agged.unstack(to_unstack)
+
+        table = agged.unstack(to_unstack, fill_value=fill_value)
 
     if not dropna:
         if isinstance(table.index, MultiIndex):
             m = MultiIndex.from_arrays(
                 cartesian_product(table.index.levels), names=table.index.names
             )
-            table = table.reindex(m, axis=0)
+            table = table.reindex(m, axis=0, fill_value=fill_value)
 
         if isinstance(table.columns, MultiIndex):
             m = MultiIndex.from_arrays(
                 cartesian_product(table.columns.levels), names=table.columns.names
             )
-            table = table.reindex(m, axis=1)
+            table = table.reindex(m, axis=1, fill_value=fill_value)
 
     if isinstance(table, ABCDataFrame):
         table = table.sort_index(axis=1)
 
     if fill_value is not None:
-        table = table.fillna(fill_value, downcast="infer")
+        table = table.fillna(fill_value)
+        table = table.infer_objects()
+        if aggfunc is len and not observed and is_integer(fill_value):
+            table = table.astype(np.int64)
 
     if margins:
         if dropna:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4876,7 +4876,7 @@ def fillna(
         axis=None,
         inplace=False,
         limit=None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> Series | None:
         return super().fillna(
             value=value,
@@ -5475,7 +5475,7 @@ def ffill(
         axis: None | Axis = None,
         inplace: bool = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> Series | None:
         return super().ffill(axis, inplace, limit, downcast)
 
@@ -5485,7 +5485,7 @@ def bfill(
         axis: None | Axis = None,
         inplace: bool = False,
         limit: None | int = None,
-        downcast=None,
+        downcast=lib.no_default,
     ) -> Series | None:
         return super().bfill(axis, inplace, limit, downcast)
 

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -504,8 +504,9 @@ def test_where_axis(self, using_array_manager):
         tm.assert_frame_equal(result, expected)
 
         warn = FutureWarning if using_array_manager else None
+        msg = "Downcasting integer-dtype"
         expected = DataFrame([[0, np.nan], [0, np.nan]])
-        with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
+        with tm.assert_produces_warning(warn, match=msg):
             result = df.where(mask, s, axis="columns")
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
@@ -234,13 +234,15 @@ def test_fillna_downcast(self):
         # GH#15277
         # infer int64 from float64
         df = DataFrame({"a": [1.0, np.nan]})
-        result = df.fillna(0, downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            result = df.fillna(0, downcast="infer")
         expected = DataFrame({"a": [1, 0]})
         tm.assert_frame_equal(result, expected)
 
         # infer int64 from float64 when fillna value is a dict
         df = DataFrame({"a": [1.0, np.nan]})
-        result = df.fillna({"a": 0}, downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            result = df.fillna({"a": 0}, downcast="infer")
         expected = DataFrame({"a": [1, 0]})
         tm.assert_frame_equal(result, expected)
 
@@ -563,7 +565,9 @@ def test_fill_corner(self, float_frame, float_string_frame):
     def test_fillna_downcast_dict(self):
         # GH#40809
         df = DataFrame({"col1": [1, np.nan]})
-        result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
+        msg = "fillna 'downcast' keyword"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
         expected = DataFrame({"col1": [1, 2]})
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
@@ -96,7 +96,8 @@ def test_interp_combo(self):
         expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
         tm.assert_series_equal(result, expected)
 
-        result = df["A"].interpolate(downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            result = df["A"].interpolate(downcast="infer")
         expected = Series([1, 2, 3, 4], name="A")
         tm.assert_series_equal(result, expected)
 
@@ -160,7 +161,8 @@ def test_interp_alt_scipy(self):
         expected.loc[5, "A"] = 6
         tm.assert_frame_equal(result, expected)
 
-        result = df.interpolate(method="barycentric", downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            result = df.interpolate(method="barycentric", downcast="infer")
         tm.assert_frame_equal(result, expected.astype(np.int64))
 
         result = df.interpolate(method="krogh")
@@ -280,7 +282,8 @@ def test_interp_inplace(self):
         tm.assert_frame_equal(result, expected)
 
         result = df.copy()
-        return_value = result["a"].interpolate(inplace=True, downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            return_value = result["a"].interpolate(inplace=True, downcast="infer")
         assert return_value is None
         tm.assert_frame_equal(result, expected.astype("int64"))
 

diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
@@ -165,7 +165,9 @@ def test_logical_with_nas(self):
         expected = Series([True, True])
         tm.assert_series_equal(result, expected)
 
-        result = d["a"].fillna(False, downcast=False) | d["b"]
+        msg = "Series.fillna 'downcast' keyword is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = d["a"].fillna(False, downcast=False) | d["b"]
         expected = Series([True, True])
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -1242,7 +1242,8 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
 
     expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C")
     if operation == "agg":
-        expected = expected.fillna(0, downcast="infer")
+        with tm.assert_produces_warning(FutureWarning, match="Casting behavior"):
+            expected = expected.fillna(0, downcast="infer")
     grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
     result = getattr(grouped, operation)(sum)
     tm.assert_series_equal(result, expected)

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -519,7 +519,8 @@ def test_fillna(self, index):
             msg = "does not support 'downcast'"
             with pytest.raises(NotImplementedError, match=msg):
                 # For now at least, we only raise if there are NAs present
-                idx.fillna(idx[0], downcast="infer")
+                with tm.assert_produces_warning(FutureWarning):
+                    idx.fillna(idx[0], downcast="infer")
 
             expected = np.array([False] * len(idx), dtype=bool)
             expected[1] = True