diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 5f377276be480..7d40fb985a593 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -149,4 +149,6 @@ def re_replacer(s): if mask is None: values[:] = f(values) else: + if values.ndim != mask.ndim: + mask = np.broadcast_to(mask, values.shape) values[mask] = f(values[mask]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5be83aa38011b..452c919449ec4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -929,7 +929,7 @@ def replace( blocks = blk.convert( copy=False, using_cow=using_cow, - convert_string=convert_string or self.dtype != _dtype_obj, + convert_string=convert_string or self.dtype == "string", ) if len(blocks) > 1 or blocks[0].dtype != blk.dtype: warnings.warn( @@ -987,7 +987,7 @@ def _replace_regex( inplace: bool = False, mask=None, using_cow: bool = False, - convert_string: bool = True, + convert_string=None, already_warned=None, ) -> list[Block]: """ @@ -1048,10 +1048,18 @@ def _replace_regex( already_warned.warned_already = True nbs = block.convert( - copy=False, using_cow=using_cow, convert_string=convert_string + copy=False, + using_cow=using_cow, + convert_string=convert_string or self.dtype == "string", ) opt = get_option("future.no_silent_downcasting") - if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt: + if ( + len(nbs) > 1 + or ( + nbs[0].dtype != block.dtype + and not (self.dtype == "string" and nbs[0].dtype == "string") + ) + ) and not opt: warnings.warn( # GH#54710 "Downcasting behavior in `replace` is deprecated and " @@ -1088,7 +1096,7 @@ def replace_list( values._replace(to_replace=src_list, value=dest_list, inplace=True) return [blk] - convert_string = self.dtype != _dtype_obj + convert_string = self.dtype == "string" # Exclude anything that we know we won't contain pairs = [ @@ -2167,6 +2175,13 @@ def where( if isinstance(self.dtype, (IntervalDtype, StringDtype)): # TestSetitemFloatIntervalWithIntIntervalValues blk = self.coerce_to_target_dtype(orig_other) + if ( + self.ndim == 2 + and isinstance(orig_cond, np.ndarray) + and orig_cond.ndim == 1 + and not is_1d_only_ea_dtype(blk.dtype) + ): + orig_cond = orig_cond[:, None] nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) return self._maybe_downcast( nbs, downcast=_downcast, using_cow=using_cow, caller="where" diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 9844122dc4b2d..c0fc72768e27f 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td from pandas import ( @@ -91,8 +89,6 @@ def test_fillna_datetime(self, datetime_frame): with pytest.raises(ValueError, match=msg): datetime_frame.fillna(5, method="ffill") - # TODO(infer_string) test as actual error instead of xfail - @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string") def test_fillna_mixed_type(self, float_string_frame): mf = float_string_frame mf.loc[mf.index[5:20], "foo"] = np.nan @@ -126,7 +122,7 @@ def test_fillna_empty(self, using_copy_on_write): df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) - def test_fillna_different_dtype(self, using_infer_string): + def test_fillna_different_dtype(self): # with different dtype (GH#3386) df = DataFrame( [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]] @@ -136,6 +132,7 @@ def test_fillna_different_dtype(self, using_infer_string): expected = DataFrame( [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]] ) + # column is originally float (all-NaN) -> filling with string gives object dtype expected[2] = expected[2].astype("object") tm.assert_frame_equal(result, expected) @@ -654,18 +651,10 @@ def test_fillna_col_reordering(self): filled = df.fillna(method="ffill") assert df.columns.tolist() == filled.columns.tolist() - # TODO(infer_string) test as actual error instead of xfail - @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string") - def test_fill_corner(self, float_frame, float_string_frame): - mf = float_string_frame - mf.loc[mf.index[5:20], "foo"] = np.nan - mf.loc[mf.index[-10:], "A"] = np.nan - - filled = float_string_frame.fillna(value=0) - assert (filled.loc[filled.index[5:20], "foo"] == 0).all() - del float_string_frame["foo"] - - float_frame.reindex(columns=[]).fillna(value=0) + def test_fill_empty(self, float_frame): + df = float_frame.reindex(columns=[]) + result = df.fillna(value=0) + tm.assert_frame_equal(result, df) def test_fillna_downcast_dict(self): # GH#40809 diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 2ee878893ce70..0971fb7e604c0 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -6,8 +6,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]: class TestDataFrameReplace: - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") def test_replace_inplace(self, datetime_frame, float_string_frame): datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan @@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame): mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan result = float_string_frame.replace(np.nan, 0) - expected = float_string_frame.fillna(value=0) + expected = float_string_frame.copy() + expected["foo"] = expected["foo"].astype(object) + expected = expected.fillna(value=0) tm.assert_frame_equal(result, expected) tsframe = datetime_frame.copy() @@ -290,34 +289,39 @@ def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype): tm.assert_frame_equal(result, expected) def test_regex_replace_dict_nested_gh4115(self): - df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) + df = DataFrame( + {"Type": Series(["Q", "T", "Q", "Q", "T"], dtype=object), "tmp": 2} + ) expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) msg = "Downcasting behavior in `replace`" with tm.assert_produces_warning(FutureWarning, match=msg): result = df.replace({"Type": {"Q": 0, "T": 1}}) + tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") - def test_regex_replace_list_to_scalar(self, mix_abc): + def test_regex_replace_list_to_scalar(self, mix_abc, using_infer_string): df = DataFrame(mix_abc) expec = DataFrame( { "a": mix_abc["a"], - "b": np.array([np.nan] * 4), + "b": [np.nan] * 4, "c": [np.nan, np.nan, np.nan, "d"], } ) + if using_infer_string: + expec["b"] = expec["b"].astype("str") msg = "Downcasting behavior in `replace`" - with tm.assert_produces_warning(FutureWarning, match=msg): + warn = None if using_infer_string else FutureWarning + with tm.assert_produces_warning(warn, match=msg): res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True) res2 = df.copy() res3 = df.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): return_value = res2.replace( [r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True ) assert return_value is None - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): return_value = res3.replace( regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True ) @@ -326,7 +330,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc): tm.assert_frame_equal(res2, expec) tm.assert_frame_equal(res3, expec) - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") def test_regex_replace_str_to_numeric(self, mix_abc): # what happens when you try to replace a numeric value with a regex? df = DataFrame(mix_abc) @@ -342,7 +345,6 @@ def test_regex_replace_str_to_numeric(self, mix_abc): tm.assert_frame_equal(res2, expec) tm.assert_frame_equal(res3, expec) - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") def test_regex_replace_regex_list_to_numeric(self, mix_abc): df = DataFrame(mix_abc) res = df.replace([r"\s*\.\s*", "b"], 0, regex=True) @@ -539,21 +541,28 @@ def test_replace_convert(self): res = rep.dtypes tm.assert_series_equal(expec, res) - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") def test_replace_mixed(self, float_string_frame): mf = float_string_frame mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan result = float_string_frame.replace(np.nan, -18) - expected = float_string_frame.fillna(value=-18) + expected = float_string_frame.copy() + expected["foo"] = expected["foo"].astype(object) + expected = expected.fillna(value=-18) tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame) + expected2 = float_string_frame.copy() + expected2["foo"] = expected2["foo"].astype(object) + tm.assert_frame_equal(result.replace(-18, np.nan), expected2) result = float_string_frame.replace(np.nan, -1e8) - expected = float_string_frame.fillna(value=-1e8) + expected = float_string_frame.copy() + expected["foo"] = expected["foo"].astype(object) + expected = expected.fillna(value=-1e8) tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + expected2 = float_string_frame.copy() + expected2["foo"] = expected2["foo"].astype(object) + tm.assert_frame_equal(result.replace(-1e8, np.nan), expected2) def test_replace_mixed_int_block_upcasting(self): # int block upcasting @@ -614,15 +623,11 @@ def test_replace_mixed2(self, using_infer_string): expected = DataFrame( { - "A": Series(["foo", "bar"]), + "A": Series(["foo", "bar"], dtype="object"), "B": Series([0, "foo"], dtype="object"), } ) - if using_infer_string: - with tm.assert_produces_warning(FutureWarning, match="Downcasting"): - result = df.replace([1, 2], ["foo", "bar"]) - else: - result = df.replace([1, 2], ["foo", "bar"]) + result = df.replace([1, 2], ["foo", "bar"]) tm.assert_frame_equal(result, expected) def test_replace_mixed3(self): @@ -931,7 +936,7 @@ def test_replace_limit(self): # TODO pass - def test_replace_dict_no_regex(self): + def test_replace_dict_no_regex(self, any_string_dtype): answer = Series( { 0: "Strongly Agree", @@ -939,7 +944,8 @@ def test_replace_dict_no_regex(self): 2: "Neutral", 3: "Disagree", 4: "Strongly Disagree", - } + }, + dtype=any_string_dtype, ) weights = { "Agree": 4, @@ -954,7 +960,7 @@ def test_replace_dict_no_regex(self): result = answer.replace(weights) tm.assert_series_equal(result, expected) - def test_replace_series_no_regex(self): + def test_replace_series_no_regex(self, any_string_dtype): answer = Series( { 0: "Strongly Agree", @@ -962,7 +968,8 @@ def test_replace_series_no_regex(self): 2: "Neutral", 3: "Disagree", 4: "Strongly Disagree", - } + }, + dtype=any_string_dtype, ) weights = Series( { @@ -1060,16 +1067,15 @@ def test_nested_dict_overlapping_keys_replace_str(self): expected = df.replace({"a": dict(zip(astr, bstr))}) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="can't set float into string") - def test_replace_swapping_bug(self, using_infer_string): + def test_replace_swapping_bug(self): df = DataFrame({"a": [True, False, True]}) res = df.replace({"a": {True: "Y", False: "N"}}) - expect = DataFrame({"a": ["Y", "N", "Y"]}) + expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object) tm.assert_frame_equal(res, expect) df = DataFrame({"a": [0, 1, 0]}) res = df.replace({"a": {0: "Y", 1: "N"}}) - expect = DataFrame({"a": ["Y", "N", "Y"]}) + expect = DataFrame({"a": ["Y", "N", "Y"]}, dtype=object) tm.assert_frame_equal(res, expect) def test_replace_period(self): @@ -1345,7 +1351,7 @@ def test_replace_commutative(self, df, to_replace, exp): ) def test_replace_replacer_dtype(self, replacer): # GH26632 - df = DataFrame(["a"]) + df = DataFrame(["a"], dtype=object) msg = "Downcasting behavior in `replace` " with tm.assert_produces_warning(FutureWarning, match=msg): result = df.replace({"a": replacer, "b": replacer}) @@ -1462,6 +1468,7 @@ def test_replace_value_category_type(self): input_df = input_df.replace("obj1", "obj9") result = input_df.replace("cat2", "catX") + result = result.astype({"col1": "int64", "col3": "float64", "col5": "str"}) tm.assert_frame_equal(result, expected) def test_replace_dict_category_type(self): @@ -1503,13 +1510,11 @@ def test_replace_with_compiled_regex(self): expected = DataFrame(["z", "b", "c"]) tm.assert_frame_equal(result, expected) - def test_replace_intervals(self, using_infer_string): + def test_replace_intervals(self): # https://github.com/pandas-dev/pandas/issues/35931 df = DataFrame({"a": [pd.Interval(0, 1), pd.Interval(0, 1)]}) - warning = FutureWarning if using_infer_string else None - with tm.assert_produces_warning(warning, match="Downcasting"): - result = df.replace({"a": {pd.Interval(0, 1): "x"}}) - expected = DataFrame({"a": ["x", "x"]}) + result = df.replace({"a": {pd.Interval(0, 1): "x"}}) + expected = DataFrame({"a": ["x", "x"]}, dtype=object) tm.assert_frame_equal(result, expected) def test_replace_unicode(self): diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 4e1697eabf734..ecc640cfd0571 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -856,7 +856,7 @@ def test_replace_series(self, how, to_key, from_key, replacer, using_infer_strin else: exp = pd.Series(self.rep[to_key], index=index, name="yyy") - if using_infer_string and exp.dtype == "string" and obj.dtype == object: + if using_infer_string and exp.dtype == "string": # with infer_string, we disable the deprecated downcasting behavior exp = exp.astype(object) @@ -889,8 +889,9 @@ def test_replace_series_datetime_tz( assert obj.dtype == from_key exp = pd.Series(self.rep[to_key], index=index, name="yyy") - if using_infer_string and to_key == "object": - assert exp.dtype == "string" + if using_infer_string and exp.dtype == "string": + # with infer_string, we disable the deprecated downcasting behavior + exp = exp.astype(object) else: assert exp.dtype == to_key