From b147e87e709597bf82f0ab29a387c298325a5e25 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Mon, 1 Apr 2019 21:16:16 -0400 Subject: [PATCH 01/10] PERF: Fix performance regression with Series statistical ops (#25952) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/grouper.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1ef05ae5f9c6b..67659380fef5f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -246,6 +246,7 @@ Performance Improvements - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) +- Fixed performance regression in :class:`Series` statistical operations (:issue:`25952`) .. _whatsnew_0250.bug_fixes: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d02775cd4b328..8a3a6aae7c9fd 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -520,21 +520,21 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) - try: - if isinstance(obj, DataFrame): - all_in_columns_index = all(g in obj.columns or g in obj.index.names - for g in keys) - elif isinstance(obj, Series): - all_in_columns_index = all(g in obj.index.names for g in keys) - else: + if (not any_callable and not any_arraylike and not any_groupers and + match_axis_length and level is None): + try: + if isinstance(obj, DataFrame): + all_in_columns_index = all(g in obj.columns or g in + obj.index.names for g in keys) + elif isinstance(obj, Series): + all_in_columns_index = all(g in obj.index.names for g in keys) + else: + all_in_columns_index = False + except Exception: all_in_columns_index = False - except Exception: - all_in_columns_index = False - if (not any_callable and not all_in_columns_index and - not any_arraylike and not any_groupers and - match_axis_length and level is None): - keys = [com.asarray_tuplesafe(keys)] + if not all_in_columns_index: + keys = [com.asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: From 98abd41342ca45b23faf12d65adf02d6877ad2bc Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Mon, 1 Apr 2019 21:21:26 -0400 Subject: [PATCH 02/10] clarify whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 67659380fef5f..596dfb301375f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -246,7 +246,7 @@ Performance Improvements - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) -- Fixed performance regression in :class:`Series` statistical operations (:issue:`25952`) +- Fixed performance regression with :class:`MultiIndex` style :class:`Series` statistical operations (:issue:`25952`) .. _whatsnew_0250.bug_fixes: From 10bc0925168ffb3d2eff87fb2ccea554c81651f9 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Tue, 2 Apr 2019 07:37:36 -0400 Subject: [PATCH 03/10] removed whatsnew --- doc/source/whatsnew/v0.25.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 596dfb301375f..1ef05ae5f9c6b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -246,7 +246,6 @@ Performance Improvements - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) -- Fixed performance regression with :class:`MultiIndex` style :class:`Series` statistical operations (:issue:`25952`) .. _whatsnew_0250.bug_fixes: From ac2f9f246eef27f0b8c502ebd2dd83bc6f0046e8 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Thu, 4 Apr 2019 21:09:07 -0400 Subject: [PATCH 04/10] removing unnecessary code --- pandas/core/groupby/grouper.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 8a3a6aae7c9fd..184e83072c9be 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -522,16 +522,11 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if (not any_callable and not any_arraylike and not any_groupers and match_axis_length and level is None): - try: - if isinstance(obj, DataFrame): - all_in_columns_index = all(g in obj.columns or g in - obj.index.names for g in keys) - elif isinstance(obj, Series): - all_in_columns_index = all(g in obj.index.names for g in keys) - else: - all_in_columns_index = False - except Exception: - all_in_columns_index = False + if isinstance(obj, DataFrame): + all_in_columns_index = all(g in obj.columns or g in + obj.index.names for g in keys) + elif isinstance(obj, Series): + all_in_columns_index = all(g in obj.index.names for g in keys) if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] From 1957a227ddd825a736ceb3fa28b45a2c2d980f61 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Thu, 4 Apr 2019 21:09:44 -0400 Subject: [PATCH 05/10] fix spacing --- pandas/core/groupby/grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 184e83072c9be..4ca40e5108ad1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -524,7 +524,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, match_axis_length and level is None): if isinstance(obj, DataFrame): all_in_columns_index = all(g in obj.columns or g in - obj.index.names for g in keys) + obj.index.names for g in keys) elif isinstance(obj, Series): all_in_columns_index = all(g in obj.index.names for g in keys) From ce89e16c82d2c9369a16aa20195369610ff41a09 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Thu, 4 Apr 2019 22:35:15 -0400 Subject: [PATCH 06/10] force rebuild --- pandas/core/groupby/grouper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4ca40e5108ad1..be445ffe16e6c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -531,6 +531,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] + if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) From 7020852c665a46aabffb8a6be14f46a33064ecfd Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Thu, 4 Apr 2019 22:35:28 -0400 Subject: [PATCH 07/10] force rebuild --- pandas/core/groupby/grouper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index be445ffe16e6c..4ca40e5108ad1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -531,7 +531,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] - if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) From d4620020df32a2414645cccb51bad3147bc39838 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Fri, 5 Apr 2019 07:34:50 -0400 Subject: [PATCH 08/10] fore rebuild --- pandas/core/groupby/grouper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4ca40e5108ad1..be445ffe16e6c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -531,6 +531,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] + if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) From 0f42f2e69ea662c46b4dbf7a2bf72ac7e0c15d6c Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Fri, 5 Apr 2019 07:35:02 -0400 Subject: [PATCH 09/10] force rebuild --- pandas/core/groupby/grouper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index be445ffe16e6c..4ca40e5108ad1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -531,7 +531,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not all_in_columns_index: keys = [com.asarray_tuplesafe(keys)] - if isinstance(level, (tuple, list)): if key is None: keys = [None] * len(level) From 92858c1c497dc2b5dff8a889da7093674e7a6a8a Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Sun, 28 Apr 2019 16:04:26 -0400 Subject: [PATCH 10/10] added comment --- pandas/core/groupby/grouper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 992d8fd959282..4cc7329aca322 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -522,6 +522,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys) + # is this an index replacement? if (not any_callable and not any_arraylike and not any_groupers and match_axis_length and level is None): if isinstance(obj, DataFrame):