From 2b04a61ab788284dacc56f14aa3984ed9648f301 Mon Sep 17 00:00:00 2001 From: Olivier Cavadenti Date: Thu, 7 Oct 2021 22:23:16 +0200 Subject: [PATCH 1/5] TST : add test for groupby aggregation dtype --- pandas/tests/groupby/aggregate/test_aggregate.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2c798e543bf6b..a11ad6ffb656f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1327,6 +1327,16 @@ def func(ser): tm.assert_frame_equal(res, expected) +def test_groupby_aggregate_directory(): + # GH#32793 + df = pd.DataFrame([[0, 1], [0, np.nan]]).convert_dtypes() + df_agg_last = df.groupby(0).agg('last') + df_agg_last_dir = df.groupby(0).agg({1: 'last'}) + + tm.assert_frame_equal(df_agg_last, df_agg_last_dir) + tm.assert_series_equal(df_agg_last.dtypes, df_agg_last_dir.dtypes) + + def test_group_mean_timedelta_nat(): # GH43132 data = Series(["1 day", "3 days", "NaT"], dtype="timedelta64[ns]") From 7588529b0beacd428ce5a0bbd3b59f01cfdc274d Mon Sep 17 00:00:00 2001 From: Olivier Cavadenti Date: Thu, 7 Oct 2021 22:55:32 +0200 Subject: [PATCH 2/5] fix pre-commit --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index a11ad6ffb656f..3393415fc3f72 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1329,9 +1329,9 @@ def func(ser): def test_groupby_aggregate_directory(): # GH#32793 - df = pd.DataFrame([[0, 1], [0, np.nan]]).convert_dtypes() - df_agg_last = df.groupby(0).agg('last') - df_agg_last_dir = df.groupby(0).agg({1: 'last'}) + df = DataFrame([[0, 1], [0, np.nan]]).convert_dtypes() + df_agg_last = df.groupby(0).agg("last") + df_agg_last_dir = df.groupby(0).agg({1: "last"}) tm.assert_frame_equal(df_agg_last, df_agg_last_dir) tm.assert_series_equal(df_agg_last.dtypes, df_agg_last_dir.dtypes) From 7b0cb1ba02a8925b064e3721972bda3d9a9ce2fa Mon Sep 17 00:00:00 2001 From: Olivier Cavadenti Date: Fri, 8 Oct 2021 10:46:01 +0200 Subject: [PATCH 3/5] add reduction_func parameter --- .../tests/groupby/aggregate/test_aggregate.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3393415fc3f72..d79889a94ba88 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1327,14 +1327,21 @@ def func(ser): tm.assert_frame_equal(res, expected) -def test_groupby_aggregate_directory(): +def test_groupby_aggregate_directory(reduction_func): # GH#32793 - df = DataFrame([[0, 1], [0, np.nan]]).convert_dtypes() - df_agg_last = df.groupby(0).agg("last") - df_agg_last_dir = df.groupby(0).agg({1: "last"}) + if reduction_func not in ["corrwith", "nth"]: + obj = DataFrame([[0, 1], [0, np.nan]]) - tm.assert_frame_equal(df_agg_last, df_agg_last_dir) - tm.assert_series_equal(df_agg_last.dtypes, df_agg_last_dir.dtypes) + obj.convert_dtypes() + df_agg_last = obj.groupby(0).agg(reduction_func) + df_agg_last_dir = obj.groupby(0).agg({1: reduction_func}) + + if reduction_func in ["size", "ngroup"]: + tm.assert_equal(df_agg_last.values, df_agg_last_dir[1].values) + assert df_agg_last.dtypes == df_agg_last_dir[1].dtypes + else: + tm.assert_frame_equal(df_agg_last, df_agg_last_dir) + tm.assert_series_equal(df_agg_last.dtypes, df_agg_last_dir.dtypes) def test_group_mean_timedelta_nat(): From 326eeb394e84912ace036050d04534c283f40322 Mon Sep 17 00:00:00 2001 From: Olivier Cavadenti Date: Wed, 13 Oct 2021 19:26:09 +0200 Subject: [PATCH 4/5] rename + change assert --- pandas/tests/groupby/aggregate/test_aggregate.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index d79889a94ba88..889e199129b38 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1333,15 +1333,19 @@ def test_groupby_aggregate_directory(reduction_func): obj = DataFrame([[0, 1], [0, np.nan]]) obj.convert_dtypes() - df_agg_last = obj.groupby(0).agg(reduction_func) - df_agg_last_dir = obj.groupby(0).agg({1: reduction_func}) + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) if reduction_func in ["size", "ngroup"]: - tm.assert_equal(df_agg_last.values, df_agg_last_dir[1].values) - assert df_agg_last.dtypes == df_agg_last_dir[1].dtypes + # names are different: None / 1 + tm.assert_series_equal( + result_reduced_series, result_reduced_frame[1], check_names=False + ) else: - tm.assert_frame_equal(df_agg_last, df_agg_last_dir) - tm.assert_series_equal(df_agg_last.dtypes, df_agg_last_dir.dtypes) + tm.assert_frame_equal(result_reduced_series, result_reduced_frame) + tm.assert_series_equal( + result_reduced_series.dtypes, result_reduced_frame.dtypes + ) def test_group_mean_timedelta_nat(): From 829f4e80744ac8f47b8f3a21ee39965c069de32b Mon Sep 17 00:00:00 2001 From: Olivier Cavadenti Date: Thu, 28 Oct 2021 22:57:08 +0200 Subject: [PATCH 5/5] mr returns --- .../tests/groupby/aggregate/test_aggregate.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 889e199129b38..f178f85154319 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1329,23 +1329,24 @@ def func(ser): def test_groupby_aggregate_directory(reduction_func): # GH#32793 - if reduction_func not in ["corrwith", "nth"]: - obj = DataFrame([[0, 1], [0, np.nan]]) - - obj.convert_dtypes() - result_reduced_series = obj.groupby(0).agg(reduction_func) - result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) - - if reduction_func in ["size", "ngroup"]: - # names are different: None / 1 - tm.assert_series_equal( - result_reduced_series, result_reduced_frame[1], check_names=False - ) - else: - tm.assert_frame_equal(result_reduced_series, result_reduced_frame) - tm.assert_series_equal( - result_reduced_series.dtypes, result_reduced_frame.dtypes - ) + if reduction_func in ["corrwith", "nth"]: + return None + + obj = DataFrame([[0, 1], [0, np.nan]]) + + result_reduced_series = obj.groupby(0).agg(reduction_func) + result_reduced_frame = obj.groupby(0).agg({1: reduction_func}) + + if reduction_func in ["size", "ngroup"]: + # names are different: None / 1 + tm.assert_series_equal( + result_reduced_series, result_reduced_frame[1], check_names=False + ) + else: + tm.assert_frame_equal(result_reduced_series, result_reduced_frame) + tm.assert_series_equal( + result_reduced_series.dtypes, result_reduced_frame.dtypes + ) def test_group_mean_timedelta_nat():