From d9e2f646e8e8588fdb0f75eaf6b01c1417d323b7 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Mon, 25 Jan 2021 17:49:58 -0800 Subject: [PATCH 01/13] TST:unxfail test for calling __finalize__ in DataFrame.pivot_table --- pandas/tests/generic/test_finalize.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 73a68e8508644..bb6766426a100 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -149,13 +149,10 @@ marks=not_implemented_mark, ), (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")), - pytest.param( - ( - pd.DataFrame, - {"A": [1], "B": [1]}, - operator.methodcaller("pivot_table", columns="A"), - ), - marks=not_implemented_mark, + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A"), ), (pd.DataFrame, frame_data, operator.methodcaller("stack")), pytest.param( From aaa09b55a26110d8b1dd4aef20a332e5acc26b2f Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Thu, 28 Jan 2021 17:21:54 -0800 Subject: [PATCH 02/13] TST:add tests for calling __finalize__ in DataFrame.groupby.mean and .median --- pandas/tests/generic/test_finalize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index bb6766426a100..0446d9f281fd9 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -737,6 +737,8 @@ def test_categorical_accessor(method): [ operator.methodcaller("sum"), lambda x: x.agg("sum"), + lambda x: x.agg("mean"), + lambda x: x.agg("median"), ], ) def test_groupby_finalize(obj, method): From 820089c9336d0b2db71672b01f2cf42887f36584 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Thu, 28 Jan 2021 17:24:22 -0800 Subject: [PATCH 03/13] TST:add xfail tests for calling __finalize__ in groupby operations --- pandas/tests/generic/test_finalize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 0446d9f281fd9..0c33add4589ed 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -756,6 +756,12 @@ def test_groupby_finalize(obj, method): lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), lambda x: x.apply(lambda y: y), + lambda x: x.agg("std"), + lambda x: x.agg("var"), + lambda x: x.agg("sem"), + lambda x: x.agg("size"), + lambda x: x.agg("ohlc"), + lambda x: x.agg("describe"), ], ) @not_implemented_mark From 02a777cbc4950b193c1058575939d4cb9dbba894 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Thu, 28 Jan 2021 17:27:56 -0800 Subject: [PATCH 04/13] BUG: propagate metadata via calling __finalize__ in groupby.mean and groupby.median --- pandas/core/groupby/groupby.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5758762c13984..f8e0c73d11cce 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1513,11 +1513,12 @@ def mean(self, numeric_only: bool = True): 2 4.0 Name: B, dtype: float64 """ - return self._cython_agg_general( + result = self._cython_agg_general( "mean", alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only), numeric_only=numeric_only, ) + return result.__finalize__(self.obj, method="groupby") @final @Substitution(name="groupby") @@ -1539,11 +1540,12 @@ def median(self, numeric_only=True): Series or DataFrame Median of values within each group. """ - return self._cython_agg_general( + result = self._cython_agg_general( "median", alt=lambda x, axis: Series(x).median(axis=axis, numeric_only=numeric_only), numeric_only=numeric_only, ) + return result.__finalize__(self.obj, method="groupby") @final @Substitution(name="groupby") From 33fca3e99fbbcb0ab3978328f464becf2ff506ab Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Fri, 29 Jan 2021 14:32:39 -0800 Subject: [PATCH 05/13] TST: add tests for calling __finalize__ in pivot_table with multiple agg_funcs --- pandas/tests/generic/test_finalize.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 0c33add4589ed..15c51e5f3e6e4 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -154,6 +154,11 @@ ({"A": [1], "B": [1]},), operator.methodcaller("pivot_table", columns="A"), ), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]), + ), (pd.DataFrame, frame_data, operator.methodcaller("stack")), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")), From 5f23bef209a1d0a554588047877f05c925f7f5bf Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Fri, 29 Jan 2021 16:01:07 -0800 Subject: [PATCH 06/13] BUG + PERF: call __finalize__ in pivot_table --- pandas/core/reshape/pivot.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7ac98d7fcbd33..9350bb4a1b987 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -58,7 +58,7 @@ def pivot_table( pieces: List[DataFrame] = [] keys = [] for func in aggfunc: - table = pivot_table( + _table = _pivot_table( data, values=values, index=index, @@ -70,10 +70,39 @@ def pivot_table( margins_name=margins_name, observed=observed, ) - pieces.append(table) + pieces.append(_table) keys.append(getattr(func, "__name__", func)) - return concat(pieces, keys=keys, axis=1) + table = concat(pieces, keys=keys, axis=1) + return table.__finalize__(data, method="pivot_table") + + table = _pivot_table( + data, + values, + index, + columns, + aggfunc, + fill_value, + margins, + dropna, + margins_name, + observed, + ) + return table.__finalize__(data, method="pivot_table") + + +def _pivot_table( + data, + values, + index, + columns, + aggfunc: str, + fill_value, + margins, + dropna, + margins_name, + observed, +) -> DataFrame: keys = index + columns From 832238509d4bff8554fc7a5d0d1237f94cccc10b Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Fri, 29 Jan 2021 16:23:49 -0800 Subject: [PATCH 07/13] DOC: update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ef904c1d7021a..14e7ae0797104 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -365,7 +365,7 @@ Reshaping - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`) - :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`) - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`) -- +- Bug in :meth:`.groupby.mean`, :meth:`.groupby.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) Sparse ^^^^^^ From d0229838b9460b1961acdf67a20b2ff17c2ada21 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Fri, 29 Jan 2021 16:36:25 -0800 Subject: [PATCH 08/13] DOC: update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 14e7ae0797104..85999bd00c28a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -365,7 +365,7 @@ Reshaping - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`) - :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`) - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`) -- Bug in :meth:`.groupby.mean`, :meth:`.groupby.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) Sparse ^^^^^^ From e17c31faa836c8c8ee6283501bc0d57cf4e8d14a Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Wed, 10 Feb 2021 17:47:14 -0800 Subject: [PATCH 09/13] TYP: add type hints for pandas.pivot_table --- pandas/core/reshape/pivot.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 9350bb4a1b987..abd582c59b962 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -16,7 +16,13 @@ import numpy as np -from pandas._typing import FrameOrSeriesUnion, IndexLabel +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + FrameOrSeriesUnion, + IndexLabel, +) from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -44,7 +50,7 @@ def pivot_table( values=None, index=None, columns=None, - aggfunc="mean", + aggfunc: AggFuncType = "mean", fill_value=None, margins=False, dropna=True, @@ -96,12 +102,12 @@ def _pivot_table( values, index, columns, - aggfunc: str, + aggfunc: Union[AggFuncTypeBase, AggFuncTypeDict], fill_value, - margins, - dropna, - margins_name, - observed, + margins: bool, + dropna: bool, + margins_name: str, + observed: bool, ) -> DataFrame: keys = index + columns From 5ba2824127901e5b431c4b4360d2fee6c36fcf41 Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Wed, 10 Feb 2021 20:12:02 -0800 Subject: [PATCH 10/13] DOC: rename pivot_table for internal use and add docstring --- pandas/core/reshape/pivot.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index abd582c59b962..1ad98c310cca3 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -64,7 +64,7 @@ def pivot_table( pieces: List[DataFrame] = [] keys = [] for func in aggfunc: - _table = _pivot_table( + _table = __internal_pivot_table( data, values=values, index=index, @@ -82,7 +82,7 @@ def pivot_table( table = concat(pieces, keys=keys, axis=1) return table.__finalize__(data, method="pivot_table") - table = _pivot_table( + table = __internal_pivot_table( data, values, index, @@ -97,7 +97,7 @@ def pivot_table( return table.__finalize__(data, method="pivot_table") -def _pivot_table( +def __internal_pivot_table( data, values, index, @@ -109,7 +109,9 @@ def _pivot_table( margins_name: str, observed: bool, ) -> DataFrame: - + """ + Equivalent of :func:`pandas.pivot_table`, except only allowing non-list ``aggfunc``. + """ keys = index + columns values_passed = values is not None From e8b9e0b320a81d0587d341ec919e1c7a0b30b90f Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Wed, 10 Feb 2021 20:13:56 -0800 Subject: [PATCH 11/13] TYP: add type hints for pandas.pivot_table --- pandas/core/reshape/pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 1ad98c310cca3..293f3c8c0bdf6 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -46,7 +46,7 @@ @Substitution("\ndata : DataFrame") @Appender(_shared_docs["pivot_table"], indents=1) def pivot_table( - data, + data: DataFrame, values=None, index=None, columns=None, @@ -98,7 +98,7 @@ def pivot_table( def __internal_pivot_table( - data, + data: DataFrame, values, index, columns, From 73a84b791932b2f704848787c6c56c6e75c845aa Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Wed, 10 Feb 2021 20:21:16 -0800 Subject: [PATCH 12/13] DOC: update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 85999bd00c28a..931fc08efc38a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -355,6 +355,8 @@ Groupby/resample/rolling - Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`) - Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`) - Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) +- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Reshaping ^^^^^^^^^ @@ -365,7 +367,7 @@ Reshaping - Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`) - :meth:`Series.value_counts` returns keys in original order (:issue:`12679`, :issue:`11227`) - Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`) -- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Sparse ^^^^^^ From d9d282a6e4f5504800aad3c9f5f3fe3e16884bdf Mon Sep 17 00:00:00 2001 From: liaoaoyuan97 Date: Fri, 19 Feb 2021 16:01:23 -0800 Subject: [PATCH 13/13] update docstring --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 8be258736af92..8feb379a82ada 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -124,7 +124,7 @@ def __internal_pivot_table( observed: bool, ) -> DataFrame: """ - Equivalent of :func:`pandas.pivot_table`, except only allowing non-list ``aggfunc``. + Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``. """ keys = index + columns