From 2ab64714f53618ad2c3c539f0ae97d89c34e3a17 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 13 Jun 2023 00:10:24 +0800 Subject: [PATCH 1/5] FIX groupby sum turning inf into nan --- pandas/_libs/groupby.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 61f448cbe0c3f..0baae23a4a71c 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -746,6 +746,13 @@ def group_sum( y = val - compensation[lab, j] t = sumx[lab, j] + y compensation[lab, j] = t - sumx[lab, j] - y + if compensation[lab, j] != compensation[lab, j]: + # GH#53606 + # If val is +/- infinity compensation is NaN + # which would lead to results being NaN instead + # of +/- infinity. We cannot use util.is_nan + # because of no gil + compensation[lab, j] = 0 sumx[lab, j] = t _check_below_mincount( From c8184e9236933d2d8c82331dc539cf3e99def792 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 13 Jun 2023 00:52:43 +0800 Subject: [PATCH 2/5] non-regression test --- pandas/tests/groupby/test_libgroupby.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index d10bcf9053d1a..615611c968f5f 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -6,6 +6,7 @@ group_cumprod, group_cumsum, group_mean, + group_sum, group_var, ) @@ -302,3 +303,20 @@ def test_cython_group_mean_Inf_at_begining_and_end(): actual, expected, ) + + +def test_cython_group_sum_Inf_at_begining_and_end(): + # GH #53606 + actual = np.array([[np.nan], [np.nan]], dtype="float64") + counts = np.array([0, 0], dtype="int64") + data = np.array([[np.inf], [np.inf], [np.inf]], dtype="float64") + labels = np.array([0, 1, 1], dtype=np.intp) + + group_sum(actual, counts, data, labels, None, is_datetimelike=False) + + expected = np.array([[np.inf], [np.inf]], dtype="float64") + + tm.assert_numpy_array_equal( + actual, + expected, + ) From 08e708a99a20b9a35e4cbf7ad8df03940da866ed Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 13 Jun 2023 01:06:00 +0800 Subject: [PATCH 3/5] changelog added --- doc/source/whatsnew/v2.1.0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index baacc8c421414..1b92175831dd3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -454,8 +454,9 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) -- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) +- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf`` to ``np.nan`` (:issue:`53606`) Reshaping ^^^^^^^^^ From 8d3ab090c63be646a528469235675733611d0ae4 Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 13 Jun 2023 11:18:50 +0800 Subject: [PATCH 4/5] parametrize tests and updated changelog --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/tests/groupby/test_libgroupby.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1b92175831dd3..1af1343d76d7d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -456,7 +456,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`) - Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`) - Bug in :meth:`SeriesGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`) -- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf`` to ``np.nan`` (:issue:`53606`) +- Bug in :meth:`SeriesGroupBy.sum` and :meth:`DataFrameGroupby.sum` summing ``np.inf + np.inf`` and ``(-np.inf) + (-np.inf)`` to ``np.nan`` (:issue:`53606`) Reshaping ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index 615611c968f5f..92c3b68d87fad 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -305,16 +305,25 @@ def test_cython_group_mean_Inf_at_begining_and_end(): ) -def test_cython_group_sum_Inf_at_begining_and_end(): +@pytest.mark.parametrize( + "values, out", + [ + ([[np.inf], [np.inf], [np.inf]], [[np.inf], [np.inf]]), + ([[np.inf], [np.inf], [-np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [np.inf]], [[np.inf], [np.nan]]), + ([[np.inf], [-np.inf], [-np.inf]], [[np.inf], [-np.inf]]), + ], +) +def test_cython_group_sum_Inf_at_begining_and_end(values, out): # GH #53606 actual = np.array([[np.nan], [np.nan]], dtype="float64") counts = np.array([0, 0], dtype="int64") - data = np.array([[np.inf], [np.inf], [np.inf]], dtype="float64") + data = np.array(values, dtype="float64") labels = np.array([0, 1, 1], dtype=np.intp) group_sum(actual, counts, data, labels, None, is_datetimelike=False) - expected = np.array([[np.inf], [np.inf]], dtype="float64") + expected = np.array(out, dtype="float64") tm.assert_numpy_array_equal( actual, From e45c4bee9ad7cbb2af75a353e3f859a90f5a716e Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Tue, 13 Jun 2023 18:54:22 +0800 Subject: [PATCH 5/5] retrigger checks