Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,7 @@ instance method on each data group. This is pretty easy to do by passing lambda
functions:

.. ipython:: python
:okwarning:

grouped = df.groupby("A")
grouped.agg(lambda x: x.std())
Expand All @@ -1009,6 +1010,7 @@ arguments. Using a bit of metaprogramming cleverness, GroupBy now has the
ability to "dispatch" method calls to the groups:

.. ipython:: python
:okwarning:

grouped.std()

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ Deprecations
- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated ignoring "nuisance columns" in :meth:`DataFrameGroupBy.agg`, :meth:`DataFrameGroupBy.apply`, and :meth:`DataFrameGroupBy.transform`; select valid columns before calling the method (:issue:`41475`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make this a sub-section (ok to lump this in with the DataFrame reductions PR sub-section as well), e.g. just a hey we are now deprecating both of these simliar things, with an example.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e.g. #41480

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated + green


.. ---------------------------------------------------------------------------

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,14 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
output[i] = sgb.transform(wrapper)
except TypeError:
# e.g. trying to call nanmean with string values
pass
warnings.warn(
f"Dropping invalid columns in {type(self).__name__}.transform "
"is deprecated. In a future version, a TypeError will be raised. "
"Before calling .transform, select only columns which should be "
"valid for the transforming function.",
FutureWarning,
stacklevel=5,
)
else:
inds.append(i)

Expand Down
19 changes: 19 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class providing the base-class of operations.
Union,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -1269,6 +1270,14 @@ def _python_agg_general(self, func, *args, **kwargs):
# if this function is invalid for this dtype, we will ignore it.
result = self.grouper.agg_series(obj, f)
except TypeError:
warnings.warn(
f"Dropping invalid columns in {type(self).__name__}.agg "
"is deprecated. In a future version, a TypeError will be raised. "
"Before calling .agg, select only columns which should be "
"valid for the aggregating function.",
FutureWarning,
stacklevel=3,
)
continue

key = base.OutputKey(label=name, position=idx)
Expand Down Expand Up @@ -2825,6 +2834,16 @@ def _get_cythonized_result(
vals, inferences = pre_processing(vals)
except TypeError as err:
error_msg = str(err)
howstr = how.replace("group_", "")
warnings.warn(
"Dropping invalid columns in "
f"{type(self).__name__}.{howstr} is deprecated. "
"In a future version, a TypeError will be raised. "
f"Before calling .{howstr}, select only columns which "
"should be valid for the function.",
FutureWarning,
stacklevel=3,
)
continue
vals = vals.astype(cython_dtype, copy=False)
if needs_2d:
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ def func(ser):
else:
return ser.sum()

result = grouped.aggregate(func)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
result = grouped.aggregate(func)
exp_grouped = three_group.loc[:, three_group.columns != "C"]
expected = exp_grouped.groupby(["A", "B"]).aggregate(func)
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -1018,6 +1019,7 @@ def test_mangle_series_groupby(self):
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
def test_with_kwargs(self):
f1 = lambda x, y, b=1: x.sum() + y + b
f2 = lambda x, y, b=2: x.sum() + y * b
Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ def test_agg_api():
def peak_to_peak(arr):
return arr.max() - arr.min()

expected = grouped.agg([peak_to_peak])
with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid", check_stacklevel=False
):
expected = grouped.agg([peak_to_peak])
expected.columns = ["data1", "data2"]
result = grouped.agg(peak_to_peak)

with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid", check_stacklevel=False
):
result = grouped.agg(peak_to_peak)
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -294,7 +301,8 @@ def raiseException(df):
raise TypeError("test")

with pytest.raises(TypeError, match="test"):
df.groupby(0).agg(raiseException)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
df.groupby(0).agg(raiseException)


def test_series_agg_multikey():
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,8 @@ def aggfun(ser):
else:
return ser.sum()

agged2 = df.groupby(keys).aggregate(aggfun)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
agged2 = df.groupby(keys).aggregate(aggfun)
assert len(agged2.columns) + 1 == len(df.columns)


Expand Down Expand Up @@ -1757,6 +1758,7 @@ def test_pivot_table_values_key_error():
@pytest.mark.parametrize(
"op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"]
)
@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
def test_empty_groupby(columns, keys, values, method, op, request):
# GH8093 & GH26411
override_dtype = None
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ def test_quantile_raises():
df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])

with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
df.groupby("key").quantile()
with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid columns"
):
df.groupby("key").quantile()


def test_quantile_out_of_bounds_q_raises():
Expand Down Expand Up @@ -236,7 +239,11 @@ def test_groupby_quantile_nullable_array(values, q):
@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
def test_groupby_quantile_skips_invalid_dtype(q):
df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
result = df.groupby("a").quantile(q)

warn = None if isinstance(q, list) else FutureWarning
with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
result = df.groupby("a").quantile(q)

expected = df.groupby("a")[["b"]].quantile(q)
tm.assert_frame_equal(result, expected)

Expand Down
43 changes: 33 additions & 10 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,9 @@ def test_transform_exclude_nuisance(df, duplicates):
grouped = df.groupby("A")

gbc = grouped["C"]
expected["C"] = gbc.transform(np.mean)
warn = FutureWarning if duplicates else None
with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
expected["C"] = gbc.transform(np.mean)
if duplicates:
# squeeze 1-column DataFrame down to Series
expected["C"] = expected["C"]["C"]
Expand All @@ -422,14 +424,16 @@ def test_transform_exclude_nuisance(df, duplicates):

expected["D"] = grouped["D"].transform(np.mean)
expected = DataFrame(expected)
result = df.groupby("A").transform(np.mean)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
result = df.groupby("A").transform(np.mean)

tm.assert_frame_equal(result, expected)


def test_transform_function_aliases(df):
result = df.groupby("A").transform("mean")
expected = df.groupby("A").transform(np.mean)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
result = df.groupby("A").transform("mean")
expected = df.groupby("A").transform(np.mean)
tm.assert_frame_equal(result, expected)

result = df.groupby("A")["C"].transform("mean")
Expand Down Expand Up @@ -498,7 +502,10 @@ def test_groupby_transform_with_int():
}
)
with np.errstate(all="ignore"):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid columns"
):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
expected = DataFrame(
{"B": np.nan, "C": Series([-1, 0, 1, -1, 0, 1], dtype="float64")}
)
Expand All @@ -514,15 +521,21 @@ def test_groupby_transform_with_int():
}
)
with np.errstate(all="ignore"):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid columns"
):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]})
tm.assert_frame_equal(result, expected)

# int that needs float conversion
s = Series([2, 3, 4, 10, 5, -1])
df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
with np.errstate(all="ignore"):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
with tm.assert_produces_warning(
FutureWarning, match="Dropping invalid columns"
):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())

s1 = s.iloc[0:3]
s1 = (s1 - s1.mean()) / s1.std()
Expand All @@ -532,7 +545,8 @@ def test_groupby_transform_with_int():
tm.assert_frame_equal(result, expected)

# int doesn't get downcasted
result = df.groupby("A").transform(lambda x: x * 2 / 2)
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
result = df.groupby("A").transform(lambda x: x * 2 / 2)
expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]})
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -791,7 +805,11 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request):
{"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)}
)

result = df.groupby("b")[cols].transform(agg_func)
warn = FutureWarning
if isinstance(exp, Series) or agg_func != "size":
warn = None
with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
result = df.groupby("b")[cols].transform(agg_func)

if agg_func == "rank":
exp = exp.astype("float")
Expand Down Expand Up @@ -1103,7 +1121,12 @@ def test_transform_agg_by_name(request, reduction_func, obj):

args = {"nth": [0], "quantile": [0.5], "corrwith": [obj]}.get(func, [])

result = g.transform(func, *args)
warn = None
if isinstance(obj, DataFrame) and func == "size":
warn = FutureWarning

with tm.assert_produces_warning(warn, match="Dropping invalid columns"):
result = g.transform(func, *args)

# this is the *definition* of a transformation
tm.assert_index_equal(result.index, obj.index)
Expand Down