diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5c04678cd5fa6..58494d2fcaa5f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7995,35 +7995,44 @@ def blk_func(values): out[:] = coerce_to_dtypes(out.values, df.dtypes) return out + if not self._is_homogeneous_type: + # try to avoid self.values call + + if filter_type is None and axis == 0 and len(self) > 0: + # operate column-wise + + # numeric_only must be None here, as other cases caught above + # require len(self) > 0 bc frame_apply messes up empty prod/sum + + # this can end up with a non-reduction + # but not always. if the types are mixed + # with datelike then need to make sure a series + + # we only end up here if we have not specified + # numeric_only and yet we have tried a + # column-by-column reduction, where we have mixed type. + # So let's just do what we can + from pandas.core.apply import frame_apply + + opa = frame_apply( + self, func=f, result_type="expand", ignore_failures=True + ) + result = opa.get_result() + if result.ndim == self.ndim: + result = result.iloc[0].rename(None) + return result + + data = self if numeric_only is None: data = self values = data.values + try: result = f(values) except TypeError: # e.g. in nanops trying to convert strs to float - # try by-column first - if filter_type is None and axis == 0: - # this can end up with a non-reduction - # but not always. if the types are mixed - # with datelike then need to make sure a series - - # we only end up here if we have not specified - # numeric_only and yet we have tried a - # column-by-column reduction, where we have mixed type. - # So let's just do what we can - from pandas.core.apply import frame_apply - - opa = frame_apply( - self, func=f, result_type="expand", ignore_failures=True - ) - result = opa.get_result() - if result.ndim == self.ndim: - result = result.iloc[0] - return result - # TODO: why doesnt axis matter here? data = _get_data(axis_matters=False) labels = data._get_agg_axis(axis) @@ -8031,6 +8040,7 @@ def blk_func(values): values = data.values with np.errstate(all="ignore"): result = f(values) + else: if numeric_only: data = _get_data(axis_matters=True) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 3a7df29ae9091..6525e93d89fce 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -346,7 +346,9 @@ def kurt(x): "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum ) assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True) - assert_stat_op_calc("product", np.prod, float_frame_with_na) + assert_stat_op_calc( + "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod + ) assert_stat_op_calc("mad", mad, float_frame_with_na) assert_stat_op_calc("var", var, float_frame_with_na) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index e4de749c5f5c5..7cb7115276f71 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -372,8 +372,12 @@ def test_fillna_categorical_nan(self): cat = Categorical([np.nan, 2, np.nan]) val = Categorical([np.nan, np.nan, np.nan]) df = DataFrame({"cats": cat, "vals": val}) - with tm.assert_produces_warning(RuntimeWarning): - res = df.fillna(df.median()) + + # GH#32950 df.median() is poorly behaved because there is no + # Categorical.median + median = Series({"cats": 2.0, "vals": np.nan}) + + res = df.fillna(median) v_exp = [np.nan, np.nan, np.nan] df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") tm.assert_frame_equal(res, df_exp)