pandas-dev · jbrockmendel · May 4, 2023 · May 4, 2023 · May 4, 2023 · May 4, 2023
diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py
@@ -24,11 +24,11 @@ def _compare_other(self, data, op, other):
         ser = pd.Series(data)
         result = op(ser, other)
 
-        expected = op(pd.Series(data._data), other)
+        # Set nullable dtype here to avoid upcasting when setting to pd.NA below
+        expected = op(pd.Series(data._data), other).astype("boolean")
 
         # fill the nan locations
         expected[data._mask] = pd.NA
-        expected = expected.astype("boolean")
 
         tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py
@@ -145,7 +145,7 @@ def test_mask_return_dtype():
 
 def test_mask_inplace_no_other():
     # GH#51685
-    df = DataFrame({"a": [1, 2], "b": ["x", "y"]})
+    df = DataFrame({"a": [1.0, 2.0], "b": ["x", "y"]})
     cond = DataFrame({"a": [True, False], "b": [False, True]})
     df.mask(cond, inplace=True)
     expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -987,7 +987,8 @@ def test_setitem_other_callable(self):
         def inc(x):
             return x + 1
 
-        df = DataFrame([[-1, 1], [1, -1]])
+        # Set dtype object straight away to avoid upcast when setting inc below
+        df = DataFrame([[-1, 1], [1, -1]], dtype=object)
         df[df > 0] = inc
 
         expected = DataFrame([[-1, inc], [inc, -1]])

diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py
@@ -79,7 +79,8 @@ def test_missing(self, date_range_frame):
         # GH 15118
         # no match found - `where` value before earliest date in index
         N = 10
-        df = date_range_frame.iloc[:N].copy()
+        # Cast to 'float64' to avoid upcast when introducing nan in df.asof
+        df = date_range_frame.iloc[:N].copy().astype("float64")
 
         result = df.asof("1989-12-31")
 

diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py
@@ -111,7 +111,8 @@ def test_map_na_ignore(float_frame):
     strlen_frame_na_ignore = float_frame_with_na.map(
         lambda x: len(str(x)), na_action="ignore"
     )
-    strlen_frame_with_na = strlen_frame.copy()
+    # Set float64 type to avoid upcast when setting NA below
+    strlen_frame_with_na = strlen_frame.copy().astype("float64")
     strlen_frame_with_na[mask] = pd.NA
     tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
 

diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py
@@ -327,9 +327,10 @@ def test_count_object():
 
 def test_count_cross_type():
     # GH8169
+    # Set float64 dtype to avoid upcast when setting nan below
     vals = np.hstack(
         (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2)))
-    )
+    ).astype("float64")
 
     df = DataFrame(vals, columns=["a", "b", "c", "d"])
     df[df == 2] = np.nan

diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
@@ -363,7 +363,8 @@ def test_filter_and_transform_with_non_unique_int_index():
     tm.assert_frame_equal(actual, expected)
 
     actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
+    # Cast to avoid upcast when setting nan below
+    expected = df.copy().astype("float64")
     expected.iloc[[0, 3, 5, 6]] = np.nan
     tm.assert_frame_equal(actual, expected)
 
@@ -405,7 +406,8 @@ def test_filter_and_transform_with_multiple_non_unique_int_index():
     tm.assert_frame_equal(actual, expected)
 
     actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
+    # Cast to avoid upcast when setting nan below
+    expected = df.copy().astype("float64")
     expected.iloc[[0, 3, 5, 6]] = np.nan
     tm.assert_frame_equal(actual, expected)
 
@@ -447,7 +449,8 @@ def test_filter_and_transform_with_non_unique_float_index():
     tm.assert_frame_equal(actual, expected)
 
     actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
+    # Cast to avoid upcast when setting nan below
+    expected = df.copy().astype("float64")
     expected.iloc[[0, 3, 5, 6]] = np.nan
     tm.assert_frame_equal(actual, expected)
 
@@ -492,7 +495,8 @@ def test_filter_and_transform_with_non_unique_timestamp_index():
     tm.assert_frame_equal(actual, expected)
 
     actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
+    # Cast to avoid upcast when setting nan below
+    expected = df.copy().astype("float64")
     expected.iloc[[0, 3, 5, 6]] = np.nan
     tm.assert_frame_equal(actual, expected)
 
@@ -534,7 +538,8 @@ def test_filter_and_transform_with_non_unique_string_index():
     tm.assert_frame_equal(actual, expected)
 
     actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False)
-    expected = df.copy()
+    # Cast to avoid upcast when setting nan below
+    expected = df.copy().astype("float64")
     expected.iloc[[0, 3, 5, 6]] = np.nan
     tm.assert_frame_equal(actual, expected)
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -166,7 +166,8 @@ def f_1(grp):
         return grp.iloc[0]
 
     result = df.groupby("A").apply(f_1)[["B"]]
-    e = expected.copy()
+    # Cast to avoid upcast when setting nan below
+    e = expected.copy().astype("float64")
     e.loc["Tiger"] = np.nan
     tm.assert_frame_equal(result, e)