diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f907e89880d25..3717e9b011f1c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1297,6 +1297,7 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`) - Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"ngroup"`` argument (:issue:`45986`) - Bug in :meth:`.DataFrameGroupBy.describe` produced incorrect results when data had duplicate columns (:issue:`50806`) +- Bug in :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` failing to respect ``as_index=False`` (:issue:`51228`) - Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 613e23fab0497..86ca486e49a32 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1269,7 +1269,11 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) data, func, *args, engine_kwargs=engine_kwargs, **kwargs ) index = self.grouper.result_index - return self.obj._constructor(result, index=index, columns=data.columns) + result = self.obj._constructor(result, index=index, columns=data.columns) + if not self.as_index: + result = self._insert_inaxis_grouper(result) + result.index = default_index(len(result)) + return result relabeling, func, columns, order = reconstruct_func(func, **kwargs) func = maybe_mangle_lambdas(func) diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index 0b2fb56a02006..9dd3d1d45abf0 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -51,7 +51,8 @@ def incorrect_function(values, index): # Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) -def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): +@pytest.mark.parametrize("as_index", [True, False]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython, as_index): def func_numba(values, index): return np.mean(values) * 2.7 @@ -65,7 +66,7 @@ def func_numba(values, index): {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] ) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - grouped = data.groupby(0) + grouped = data.groupby(0, as_index=as_index) if pandas_obj == "Series": grouped = grouped[1] diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 2b70d7325a209..0264d2a09778f 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -48,7 +48,8 @@ def incorrect_function(values, index): # Filter warnings when parallel=True and the function can't be parallelized by Numba @pytest.mark.parametrize("jit", [True, False]) @pytest.mark.parametrize("pandas_obj", ["Series", "DataFrame"]) -def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython): +@pytest.mark.parametrize("as_index", [True, False]) +def test_numba_vs_cython(jit, pandas_obj, nogil, parallel, nopython, as_index): def func(values, index): return values + 1 @@ -62,7 +63,7 @@ def func(values, index): {0: ["a", "a", "b", "b", "a"], 1: [1.0, 2.0, 3.0, 4.0, 5.0]}, columns=[0, 1] ) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - grouped = data.groupby(0) + grouped = data.groupby(0, as_index=as_index) if pandas_obj == "Series": grouped = grouped[1]