From 071c9fa4626b056135b35fbf898092e3e57ba4ca Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 18 Jul 2019 22:21:16 -0500
Subject: [PATCH 01/42] Rename _is_cython_func to _get_cython_func

---
 pandas/core/base.py            | 4 ++--
 pandas/core/groupby/generic.py | 7 ++++---
 pandas/core/resample.py        | 4 ++--
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9480e2e425f79..de59126463970 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -565,7 +565,7 @@ def is_any_frame():
         else:
             result = None
 
-        f = self._is_cython_func(arg)
+        f = self._get_cython_func(arg)
         if f and not args and not kwargs:
             return getattr(self, f)(), None
 
@@ -652,7 +652,7 @@ def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
                 kwargs[attr] = getattr(self, attr)
         return obj_type(obj, **kwargs)
 
-    def _is_cython_func(self, arg):
+    def _get_cython_func(self, arg):
         """
         if we define an internal function for this argument, return it
         """
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7fd0ca94e7997..66f4794bff589 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -572,7 +572,8 @@ def _transform_general(self, func, *args, **kwargs):
     def transform(self, func, *args, **kwargs):
 
         # optimized transforms
-        func = self._is_cython_func(func) or func
+        func = self._get_cython_func(func) or func
+
         if isinstance(func, str):
             if func in base.cython_transforms:
                 # cythonized transform
@@ -852,7 +853,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs):
             if relabeling:
                 ret.columns = columns
         else:
-            cyfunc = self._is_cython_func(func_or_funcs)
+            cyfunc = self._get_cython_func(func_or_funcs)
             if cyfunc and not args and not kwargs:
                 return getattr(self, cyfunc)()
 
@@ -1004,7 +1005,7 @@ def _aggregate_named(self, func, *args, **kwargs):
     @Substitution(klass="Series", selected="A.")
     @Appender(_transform_template)
     def transform(self, func, *args, **kwargs):
-        func = self._is_cython_func(func) or func
+        func = self._get_cython_func(func) or func
 
         # if string function
         if isinstance(func, str):
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index b4a3e6ed71bf4..0f15cc54e9c67 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1047,7 +1047,7 @@ def _downsample(self, how, **kwargs):
         **kwargs : kw args passed to how function
         """
         self._set_binner()
-        how = self._is_cython_func(how) or how
+        how = self._get_cython_func(how) or how
         ax = self.ax
         obj = self._selected_obj
 
@@ -1197,7 +1197,7 @@ def _downsample(self, how, **kwargs):
         if self.kind == "timestamp":
             return super()._downsample(how, **kwargs)
 
-        how = self._is_cython_func(how) or how
+        how = self._get_cython_func(how) or how
         ax = self.ax
 
         if is_subperiod(ax.freq, self.freq):

From 0118cc89c4841b2ad7b6208e1f4f29fe6ad6d284 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 18 Jul 2019 22:21:27 -0500
Subject: [PATCH 02/42] typo

---
 pandas/core/groupby/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 66f4794bff589..f462460034e38 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -590,7 +590,7 @@ def transform(self, func, *args, **kwargs):
 
         obj = self._obj_with_exclusions
 
-        # nuiscance columns
+        # nuisance columns
         if not result.columns.equals(obj.columns):
             return self._transform_general(func, *args, **kwargs)
 

From 2f29e4e5d76cadcf86b4d2efea56bc3c2327be35 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 18 Jul 2019 23:13:35 -0500
Subject: [PATCH 03/42] CLN: whitelist func in transform(func)

---
 pandas/core/groupby/base.py    | 64 +++++++++++++++++++++++++++++++++-
 pandas/core/groupby/generic.py | 14 +++++---
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 5c4f1fa3fbddf..97dbe890e9afe 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -98,6 +98,68 @@ def _gotitem(self, key, ndim, subset=None):
 
 dataframe_apply_whitelist = common_apply_whitelist | frozenset(["dtypes", "corrwith"])
 
-cython_transforms = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"])
+# cythonized transformations or canned "agg+broadcast", which do not
+# require postprocessing of the result by transform.
+cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"])
 
 cython_cast_blacklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"])
+
+# List of aggregation/reduction functions.
+# These map each series/column to a single value
+reduction_functions = frozenset(
+    [
+        "sum",
+        "first",
+        "mean",
+        "all",
+        "any",
+        "bfill",
+        "corr",
+        "count",
+        "cov",
+        "idxmax",
+        "idxmin",
+        "last",
+        "mad",
+        "max",
+        "median",
+        "min",
+        "ngroup",
+        "nth",
+        "nunique",
+        "prod",
+        "quantile",
+        "sem",
+        "size",
+        "skew",
+        "std",
+        "var",
+    ]
+)
+
+# List of transformation functions.
+# These map each object to a like-indexed result object
+transformation_functions = frozenset(
+    [
+        "backfill",
+        "corrwith",
+        "cumcount",
+        "cummax",
+        "cummin",
+        "cumprod",
+        "cumsum",
+        "diff",
+        "ffill",
+        "fillna",
+        "rank",
+        "pad",
+        "pct_change",
+        "shift",
+        "tshift",
+    ]
+)
+
+# Valid values  of `name` for `groupby.transform(name)`
+transform_recognized_functions = reduction_functions | transformation_functions
+transform_recognized_functions -= \
+    {"corr"} # returns multindex, exclude from transform(name) for now.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f462460034e38..da9f2b79482a4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -575,8 +575,11 @@ def transform(self, func, *args, **kwargs):
         func = self._get_cython_func(func) or func
 
         if isinstance(func, str):
-            if func in base.cython_transforms:
-                # cythonized transform
+            if not (func in base.transform_recognized_functions):
+                msg = "'%s' is not a valid function name for transform(name)"
+                raise ValueError(msg % func)
+            if func in base.cythonized_kernels:
+                # cythonized transformation or canned "reduction+broadcast"
                 return getattr(self, func)(*args, **kwargs)
             else:
                 # cythonized aggregation and merge
@@ -1009,8 +1012,11 @@ def transform(self, func, *args, **kwargs):
 
         # if string function
         if isinstance(func, str):
-            if func in base.cython_transforms:
-                # cythonized transform
+            if not (func in base.transform_recognized_functions):
+                msg = "'%s' is not a valid function name for transform(name)"
+                raise ValueError(msg % func)
+            if func in base.cythonized_kernels:
+                # cythonized transform or canned "agg+broadcast"
                 return getattr(self, func)(*args, **kwargs)
             else:
                 # cythonized aggregation and merge

From 45e9f707260698c8282695cb1351b4060fef32ba Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 18 Jul 2019 23:15:24 -0500
Subject: [PATCH 04/42] comments

---
 pandas/core/groupby/generic.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index da9f2b79482a4..f7690af7f2971 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -582,7 +582,9 @@ def transform(self, func, *args, **kwargs):
                 # cythonized transformation or canned "reduction+broadcast"
                 return getattr(self, func)(*args, **kwargs)
             else:
-                # cythonized aggregation and merge
+                # If func is a reduction, we need to broadcast the
+                # result to the whole group. Compute func result
+                # and deal with possible broadcasting below.
                 result = getattr(self, func)(*args, **kwargs)
         else:
             return self._transform_general(func, *args, **kwargs)
@@ -1010,7 +1012,6 @@ def _aggregate_named(self, func, *args, **kwargs):
     def transform(self, func, *args, **kwargs):
         func = self._get_cython_func(func) or func
 
-        # if string function
         if isinstance(func, str):
             if not (func in base.transform_recognized_functions):
                 msg = "'%s' is not a valid function name for transform(name)"
@@ -1019,7 +1020,9 @@ def transform(self, func, *args, **kwargs):
                 # cythonized transform or canned "agg+broadcast"
                 return getattr(self, func)(*args, **kwargs)
             else:
-                # cythonized aggregation and merge
+                # If func is a reduction, we need to broadcast the
+                # result to the whole group. Compute func result
+                # and deal with possible broadcasting below.
                 return self._transform_fast(
                     lambda: getattr(self, func)(*args, **kwargs), func
                 )

From 0a1a0fb47b18410ebe9058ac8a54ca5acb9a1621 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 18 Jul 2019 23:24:22 -0500
Subject: [PATCH 05/42] tests

---
 pandas/tests/groupby/test_transform.py | 46 ++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 1eab3ba253f4d..f3561e4d60af1 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1001,3 +1001,49 @@ def test_ffill_not_in_axis(func, key, val):
     expected = df
 
     assert_frame_equal(result, expected)
+
+
+def test_transform_invalid_name_raises():
+    df = DataFrame(dict(a=[0, 1, 1, 2]))
+    g = df.groupby(["a", "b", "b", "c"])
+    with pytest.raises(ValueError, match="not a valid function name"):
+        g.transform("some_arbitrary_name")
+
+    # method exists on the object, but is not a valid transformation/agg
+    with pytest.raises(ValueError, match="not a valid function name"):
+        g.transform("aggregate")
+
+    # Test SeriesGroupBy
+    g = df["a"].groupby(["a", "b", "b", "c"])
+    with pytest.raises(ValueError, match="not a valid function name"):
+        g.transform("some_arbitrary_name")
+
+    # method exists on the object, but is not a valid transformation/agg
+    with pytest.raises(ValueError, match="not a valid function name"):
+        g.transform("aggregate")
+
+
+from pandas.core.groupby.base import reduction_functions
+
+
+@pytest.mark.parametrize("func", reduction_functions)
+def test_transform_agg_by_name(func):
+
+    df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
+    g = df.groupby(np.repeat([0, 1], 3))
+
+    if func == "ngroup":  # GH#27468
+        pytest.xfail("TODO: g.transform('ngroup') doesn't work")
+    if func == "size":  # GH#27469
+        pytest.xfail("TODO: g.transform('size') doesn't work")
+    if func == "corr":
+        pytest.xfail("corr returns multiindex, excluded from transform for now")
+
+    args = {"nth": [0], "quantile": [0.5]}.get(func, [])
+
+    print(func)
+    result = g.transform(func, *args)
+    tm.assert_index_equal(result.index, df.index)
+
+    # check values replicated broadcasted across group
+    assert len(set(result.iloc[-3:, 1])) == 1

From b845034992d8985cfc33cc50822ba10a55f5f5c3 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 01:16:14 -0500
Subject: [PATCH 06/42] black

---
 pandas/core/groupby/base.py            | 5 +++--
 pandas/tests/groupby/test_transform.py | 5 ++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 97dbe890e9afe..8bb102540d3e3 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -161,5 +161,6 @@ def _gotitem(self, key, ndim, subset=None):
 
 # Valid values  of `name` for `groupby.transform(name)`
 transform_recognized_functions = reduction_functions | transformation_functions
-transform_recognized_functions -= \
-    {"corr"} # returns multindex, exclude from transform(name) for now.
+transform_recognized_functions -= {
+    "corr"
+}  # returns multindex, exclude from transform(name) for now.
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index f3561e4d60af1..17e66e860a5b7 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -18,6 +18,7 @@
     concat,
     date_range,
 )
+from pandas.core.groupby.base import reduction_functions
 from pandas.core.groupby.groupby import DataError
 from pandas.util import testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -1010,6 +1011,7 @@ def test_transform_invalid_name_raises():
         g.transform("some_arbitrary_name")
 
     # method exists on the object, but is not a valid transformation/agg
+    assert hasattr(g, "aggregate")  # make sure the method exists
     with pytest.raises(ValueError, match="not a valid function name"):
         g.transform("aggregate")
 
@@ -1023,9 +1025,6 @@ def test_transform_invalid_name_raises():
         g.transform("aggregate")
 
 
-from pandas.core.groupby.base import reduction_functions
-
-
 @pytest.mark.parametrize("func", reduction_functions)
 def test_transform_agg_by_name(func):
 

From f45acd54499a2c8ca00a7c3d44ad5c0647a25529 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 09:25:57 -0500
Subject: [PATCH 07/42] Deterministic test order for pytest collection

---
 pandas/tests/groupby/test_transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 17e66e860a5b7..e8ddf32a4132a 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1025,7 +1025,7 @@ def test_transform_invalid_name_raises():
         g.transform("aggregate")
 
 
-@pytest.mark.parametrize("func", reduction_functions)
+@pytest.mark.parametrize("func", sorted(reduction_functions))
 def test_transform_agg_by_name(func):
 
     df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))

From 514528b0949238b4a5126350c146cc90afdf4380 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:08:30 -0500
Subject: [PATCH 08/42] use format()

---
 pandas/core/groupby/generic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f7690af7f2971..a34b89205b4ef 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -576,8 +576,8 @@ def transform(self, func, *args, **kwargs):
 
         if isinstance(func, str):
             if not (func in base.transform_recognized_functions):
-                msg = "'%s' is not a valid function name for transform(name)"
-                raise ValueError(msg % func)
+                msg = "'{func}' is not a valid function name for transform(name)"
+                raise ValueError(msg.format(func=func))
             if func in base.cythonized_kernels:
                 # cythonized transformation or canned "reduction+broadcast"
                 return getattr(self, func)(*args, **kwargs)
@@ -1014,8 +1014,8 @@ def transform(self, func, *args, **kwargs):
 
         if isinstance(func, str):
             if not (func in base.transform_recognized_functions):
-                msg = "'%s' is not a valid function name for transform(name)"
-                raise ValueError(msg % func)
+                msg = "'{func}' is not a valid function name for transform(name)"
+                raise ValueError(msg.format(func=func))
             if func in base.cythonized_kernels:
                 # cythonized transform or canned "agg+broadcast"
                 return getattr(self, func)(*args, **kwargs)

From d662c9bae9f4aec7441066310b78d1897f81c0e4 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:16:27 -0500
Subject: [PATCH 09/42] delete print func, now parameterized

---
 pandas/tests/groupby/test_transform.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index e8ddf32a4132a..e48bc245f5674 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1040,7 +1040,6 @@ def test_transform_agg_by_name(func):
 
     args = {"nth": [0], "quantile": [0.5]}.get(func, [])
 
-    print(func)
     result = g.transform(func, *args)
     tm.assert_index_equal(result.index, df.index)
 

From 3c56a62dce809a4f3d845c6a4b58161a5d3badcc Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:25:55 -0500
Subject: [PATCH 10/42] Make ita fixture

---
 pandas/tests/groupby/conftest.py       | 7 +++++++
 pandas/tests/groupby/test_transform.py | 6 ++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index bdf93756b7559..2ff9388b2f1e6 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -102,3 +102,10 @@ def three_group():
             "F": np.random.randn(11),
         }
     )
+
+
+from pandas.core.groupby.base import reduction_functions
+
+@pytest.fixture(params=sorted(reduction_functions))
+def reduction_func(request):
+    return request.param
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index e48bc245f5674..d2bf477428dea 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -18,7 +18,6 @@
     concat,
     date_range,
 )
-from pandas.core.groupby.base import reduction_functions
 from pandas.core.groupby.groupby import DataError
 from pandas.util import testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -1025,9 +1024,8 @@ def test_transform_invalid_name_raises():
         g.transform("aggregate")
 
 
-@pytest.mark.parametrize("func", sorted(reduction_functions))
-def test_transform_agg_by_name(func):
-
+def test_transform_agg_by_name(reduction_func):
+    func = reduction_func
     df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
     g = df.groupby(np.repeat([0, 1], 3))
 

From 52d4a618d2e712570e603e13faa60d6353271ea8 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:30:53 -0500
Subject: [PATCH 11/42] typing

---
 pandas/core/base.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index de59126463970..4e8b04f0eea6a 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -5,6 +5,7 @@
 from collections import OrderedDict
 import textwrap
 import warnings
+from typing import Optional
 
 import numpy as np
 
@@ -652,7 +653,7 @@ def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
                 kwargs[attr] = getattr(self, attr)
         return obj_type(obj, **kwargs)
 
-    def _get_cython_func(self, arg):
+    def _get_cython_func(self, arg: str) -> Optional[str]:
         """
         if we define an internal function for this argument, return it
         """

From a6a681a08e0e050119c4fa6f33f7f80784111407 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:31:12 -0500
Subject: [PATCH 12/42] black

---
 pandas/core/base.py              | 2 +-
 pandas/tests/groupby/conftest.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 4e8b04f0eea6a..4b20bafcdf941 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -4,8 +4,8 @@
 import builtins
 from collections import OrderedDict
 import textwrap
-import warnings
 from typing import Optional
+import warnings
 
 import numpy as np
 
diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index 2ff9388b2f1e6..a17d0b5b5793c 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -2,6 +2,7 @@
 import pytest
 
 from pandas import DataFrame, MultiIndex
+from pandas.core.groupby.base import reduction_functions
 from pandas.util import testing as tm
 
 
@@ -104,8 +105,6 @@ def three_group():
     )
 
 
-from pandas.core.groupby.base import reduction_functions
-
 @pytest.fixture(params=sorted(reduction_functions))
 def reduction_func(request):
     return request.param

From 4e3ba6353da5f38ecfa1b1b4af871488fd5eb5a5 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Fri, 19 Jul 2019 18:57:12 -0500
Subject: [PATCH 13/42] Link issue

---
 pandas/tests/groupby/test_transform.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index d2bf477428dea..ba9aba53a4ab5 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1004,6 +1004,7 @@ def test_ffill_not_in_axis(func, key, val):
 
 
 def test_transform_invalid_name_raises():
+    # GH#27486
     df = DataFrame(dict(a=[0, 1, 1, 2]))
     g = df.groupby(["a", "b", "b", "c"])
     with pytest.raises(ValueError, match="not a valid function name"):

From 0ad156b2498f53392bb9467c9de65a09f6e07613 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 09:46:31 -0500
Subject: [PATCH 14/42] sort names

---
 pandas/core/groupby/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 8bb102540d3e3..2cd91f627f7d1 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -108,20 +108,19 @@ def _gotitem(self, key, ndim, subset=None):
 # These map each series/column to a single value
 reduction_functions = frozenset(
     [
-        "sum",
-        "first",
-        "mean",
         "all",
         "any",
         "bfill",
         "corr",
         "count",
         "cov",
+        "first",
         "idxmax",
         "idxmin",
         "last",
         "mad",
         "max",
+        "mean",
         "median",
         "min",
         "ngroup",
@@ -133,6 +132,7 @@ def _gotitem(self, key, ndim, subset=None):
         "size",
         "skew",
         "std",
+        "sum",
         "var",
     ]
 )
@@ -151,9 +151,9 @@ def _gotitem(self, key, ndim, subset=None):
         "diff",
         "ffill",
         "fillna",
-        "rank",
         "pad",
         "pct_change",
+        "rank",
         "shift",
         "tshift",
     ]

From b1cb4b0d7e8fc9f3f82d0c44c3ac9b91a722be61 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 09:47:37 -0500
Subject: [PATCH 15/42] docstring

---
 pandas/tests/groupby/conftest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index a17d0b5b5793c..4121cb56b27e8 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -107,4 +107,6 @@ def three_group():
 
 @pytest.fixture(params=sorted(reduction_functions))
 def reduction_func(request):
+    """yields the string names of all groupby reduction functions, one at a time.
+    """
     return request.param

From d445de4791043ad56eb60392ce069ca3de1c5fa3 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 09:50:22 -0500
Subject: [PATCH 16/42] rename to use 'kernel'

---
 pandas/core/groupby/base.py    | 8 ++++----
 pandas/core/groupby/generic.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 2cd91f627f7d1..adc612d49b501 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -106,7 +106,7 @@ def _gotitem(self, key, ndim, subset=None):
 
 # List of aggregation/reduction functions.
 # These map each series/column to a single value
-reduction_functions = frozenset(
+reduction_kernels = frozenset(
     [
         "all",
         "any",
@@ -139,7 +139,7 @@ def _gotitem(self, key, ndim, subset=None):
 
 # List of transformation functions.
 # These map each object to a like-indexed result object
-transformation_functions = frozenset(
+transformation_kernels = frozenset(
     [
         "backfill",
         "corrwith",
@@ -160,7 +160,7 @@ def _gotitem(self, key, ndim, subset=None):
 )
 
 # Valid values  of `name` for `groupby.transform(name)`
-transform_recognized_functions = reduction_functions | transformation_functions
-transform_recognized_functions -= {
+transform_kernel_whitelist = reduction_kernels | transformation_kernels
+transform_kernel_whitelist -= {
     "corr"
 }  # returns multindex, exclude from transform(name) for now.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index a34b89205b4ef..1aa15d0aa54ac 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -575,7 +575,7 @@ def transform(self, func, *args, **kwargs):
         func = self._get_cython_func(func) or func
 
         if isinstance(func, str):
-            if not (func in base.transform_recognized_functions):
+            if not (func in base.transform_kernel_whitelist):
                 msg = "'{func}' is not a valid function name for transform(name)"
                 raise ValueError(msg.format(func=func))
             if func in base.cythonized_kernels:
@@ -1013,7 +1013,7 @@ def transform(self, func, *args, **kwargs):
         func = self._get_cython_func(func) or func
 
         if isinstance(func, str):
-            if not (func in base.transform_recognized_functions):
+            if not (func in base.transform_kernel_whitelist):
                 msg = "'{func}' is not a valid function name for transform(name)"
                 raise ValueError(msg.format(func=func))
             if func in base.cythonized_kernels:

From de2c59cf44edf70bbc9e368d39edab05066fdc40 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 09:54:52 -0500
Subject: [PATCH 17/42] corr doesn't belong on the list

---
 pandas/core/groupby/base.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index adc612d49b501..f0482a31d959b 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -111,7 +111,6 @@ def _gotitem(self, key, ndim, subset=None):
         "all",
         "any",
         "bfill",
-        "corr",
         "count",
         "cov",
         "first",
@@ -161,6 +160,3 @@ def _gotitem(self, key, ndim, subset=None):
 
 # Valid values  of `name` for `groupby.transform(name)`
 transform_kernel_whitelist = reduction_kernels | transformation_kernels
-transform_kernel_whitelist -= {
-    "corr"
-}  # returns multindex, exclude from transform(name) for now.

From bd73cff65df525fb49848348be3548e6b0342a5e Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 09:57:10 -0500
Subject: [PATCH 18/42] comment

---
 pandas/core/groupby/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index f0482a31d959b..b052dcf956779 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -159,4 +159,6 @@ def _gotitem(self, key, ndim, subset=None):
 )
 
 # Valid values  of `name` for `groupby.transform(name)`
+# NOTE: do NOT edit this directly. New additions should be inserted
+# into the appropriate list above.
 transform_kernel_whitelist = reduction_kernels | transformation_kernels

From 7d7eecc74c8fd318595f2de95e351b37d4fd2f4d Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 10:01:07 -0500
Subject: [PATCH 19/42] whatsnew

---
 doc/source/whatsnew/v1.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 9caf127553e05..f7bb8f0bd1131 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -39,7 +39,7 @@ Backwards incompatible API changes
 
 .. _whatsnew_1000.api.other:
 
--
+- :meth:`GroupBy.transform(name)` now raises on invalid operation names (:issue:`27489`).
 -
 
 Other API changes

From d343686e243dac94f0d8dbe4c449c290383660b3 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 10:49:29 -0500
Subject: [PATCH 20/42] Fix name in conftest.py

---
 pandas/tests/groupby/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py
index 4121cb56b27e8..72e60c5099304 100644
--- a/pandas/tests/groupby/conftest.py
+++ b/pandas/tests/groupby/conftest.py
@@ -2,7 +2,7 @@
 import pytest
 
 from pandas import DataFrame, MultiIndex
-from pandas.core.groupby.base import reduction_functions
+from pandas.core.groupby.base import reduction_kernels
 from pandas.util import testing as tm
 
 
@@ -105,7 +105,7 @@ def three_group():
     )
 
 
-@pytest.fixture(params=sorted(reduction_functions))
+@pytest.fixture(params=sorted(reduction_kernels))
 def reduction_func(request):
     """yields the string names of all groupby reduction functions, one at a time.
     """

From 6d0301a1a45bf984bff6ed5fcfec9f38656a9655 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 11:17:42 -0500
Subject: [PATCH 21/42] test SeriesGroupby

---
 pandas/tests/groupby/test_transform.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index ba9aba53a4ab5..b2a756a226aad 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1025,10 +1025,16 @@ def test_transform_invalid_name_raises():
         g.transform("aggregate")
 
 
-def test_transform_agg_by_name(reduction_func):
+@pytest.mark.parametrize(
+    "obj",
+    [
+        DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))),
+        Series([0, 0, 0, 1, 1, 1])
+    ],
+)
+def test_transform_agg_by_name(reduction_func, obj):
     func = reduction_func
-    df = DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6)))
-    g = df.groupby(np.repeat([0, 1], 3))
+    g = obj.groupby(np.repeat([0, 1], 3))
 
     if func == "ngroup":  # GH#27468
         pytest.xfail("TODO: g.transform('ngroup') doesn't work")
@@ -1036,11 +1042,13 @@ def test_transform_agg_by_name(reduction_func):
         pytest.xfail("TODO: g.transform('size') doesn't work")
     if func == "corr":
         pytest.xfail("corr returns multiindex, excluded from transform for now")
+    if func == "cov" and isinstance(obj, Series):
+        pytest.xfail("skip cov for series")
 
     args = {"nth": [0], "quantile": [0.5]}.get(func, [])
 
     result = g.transform(func, *args)
-    tm.assert_index_equal(result.index, df.index)
+    tm.assert_index_equal(result.index, obj.index)
 
     # check values replicated broadcasted across group
-    assert len(set(result.iloc[-3:, 1])) == 1
+    assert len(set(DataFrame(result).iloc[-3:, -1])) == 1

From b6e924ed1c9e51214d278b264a9b52d26a48d980 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 12:01:33 -0500
Subject: [PATCH 22/42] black

---
 pandas/tests/groupby/test_transform.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index b2a756a226aad..9d8eea6673d11 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1027,10 +1027,7 @@ def test_transform_invalid_name_raises():
 
 @pytest.mark.parametrize(
     "obj",
-    [
-        DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))),
-        Series([0, 0, 0, 1, 1, 1])
-    ],
+    [DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))), Series([0, 0, 0, 1, 1, 1])],
 )
 def test_transform_agg_by_name(reduction_func, obj):
     func = reduction_func

From 399804c301a996d994ae7c5d5cf7ff1d97fdfc49 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 22:56:02 -0500
Subject: [PATCH 23/42] remove cov

---
 pandas/core/groupby/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index b052dcf956779..421aac411586b 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -112,7 +112,6 @@ def _gotitem(self, key, ndim, subset=None):
         "any",
         "bfill",
         "count",
-        "cov",
         "first",
         "idxmax",
         "idxmin",

From 1d158deb50de05bcd1c8d4415836ec84985f322e Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 22:57:12 -0500
Subject: [PATCH 24/42] bfill is a reduction

---
 pandas/core/groupby/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 421aac411586b..64295fe359c36 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -110,7 +110,6 @@ def _gotitem(self, key, ndim, subset=None):
     [
         "all",
         "any",
-        "bfill",
         "count",
         "first",
         "idxmax",
@@ -147,6 +146,7 @@ def _gotitem(self, key, ndim, subset=None):
         "cumprod",
         "cumsum",
         "diff",
+        "bfill",
         "ffill",
         "fillna",
         "pad",

From 04dadd9573f12e4f3a33b3a3144cc4fcc4f89513 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 22:57:43 -0500
Subject: [PATCH 25/42] bfill is not a reduction I mean


From 146e402253a571a629a15b0a147a461c12b54a03 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 22:59:27 -0500
Subject: [PATCH 26/42] sort

---
 pandas/core/groupby/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 64295fe359c36..93145d680db5d 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -139,6 +139,7 @@ def _gotitem(self, key, ndim, subset=None):
 transformation_kernels = frozenset(
     [
         "backfill",
+        "bfill",
         "corrwith",
         "cumcount",
         "cummax",
@@ -146,7 +147,6 @@ def _gotitem(self, key, ndim, subset=None):
         "cumprod",
         "cumsum",
         "diff",
-        "bfill",
         "ffill",
         "fillna",
         "pad",

From 932d224706cee19106eb3a8247c76961866fd255 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 23:01:21 -0500
Subject: [PATCH 27/42] comment

---
 pandas/core/groupby/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 93145d680db5d..8d6ab8c78a808 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -136,6 +136,7 @@ def _gotitem(self, key, ndim, subset=None):
 
 # List of transformation functions.
 # These map each object to a like-indexed result object
+# like-indexed means *same* index and *same* columns.
 transformation_kernels = frozenset(
     [
         "backfill",

From 897d2fb2f21f603279e15699a18f81a6d303a874 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 23:02:43 -0500
Subject: [PATCH 28/42] comment

---
 pandas/core/groupby/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 8d6ab8c78a808..db8256e448a7a 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -135,7 +135,7 @@ def _gotitem(self, key, ndim, subset=None):
 )
 
 # List of transformation functions.
-# These map each object to a like-indexed result object
+# These map each Series/DataFrame to a like-indexed result object
 # like-indexed means *same* index and *same* columns.
 transformation_kernels = frozenset(
     [

From 6a808281c46a1e347821bc7aef8131587836cbe6 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 23:04:47 -0500
Subject: [PATCH 29/42] comment

---
 pandas/core/groupby/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index db8256e448a7a..2c9734b3b6ed6 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -135,7 +135,7 @@ def _gotitem(self, key, ndim, subset=None):
 )
 
 # List of transformation functions.
-# These map each Series/DataFrame to a like-indexed result object
+# These map each group to a like-indexed result object
 # like-indexed means *same* index and *same* columns.
 transformation_kernels = frozenset(
     [

From 255519331e88325f89b76787a23ccf7c76f22e78 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 23:25:18 -0500
Subject: [PATCH 30/42] remove stale test cases

---
 pandas/tests/groupby/test_transform.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 9d8eea6673d11..56b0bd1df52d6 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1037,10 +1037,6 @@ def test_transform_agg_by_name(reduction_func, obj):
         pytest.xfail("TODO: g.transform('ngroup') doesn't work")
     if func == "size":  # GH#27469
         pytest.xfail("TODO: g.transform('size') doesn't work")
-    if func == "corr":
-        pytest.xfail("corr returns multiindex, excluded from transform for now")
-    if func == "cov" and isinstance(obj, Series):
-        pytest.xfail("skip cov for series")
 
     args = {"nth": [0], "quantile": [0.5]}.get(func, [])
 

From 3d7bc647ac4acc878421c6edb24225bac9d1f323 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Sun, 21 Jul 2019 23:26:11 -0500
Subject: [PATCH 31/42] remove duplicate test

---
 pandas/tests/groupby/test_transform.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 56b0bd1df52d6..11919e3125bb6 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1020,10 +1020,6 @@ def test_transform_invalid_name_raises():
     with pytest.raises(ValueError, match="not a valid function name"):
         g.transform("some_arbitrary_name")
 
-    # method exists on the object, but is not a valid transformation/agg
-    with pytest.raises(ValueError, match="not a valid function name"):
-        g.transform("aggregate")
-
 
 @pytest.mark.parametrize(
     "obj",

From 053320b114a0e427128430b01d95ba7d98fa0da6 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Mon, 22 Jul 2019 19:14:09 -0500
Subject: [PATCH 32/42] CI


From 6b9041847e2cbeae44d5e2d150904ab8f052d8c9 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Tue, 23 Jul 2019 12:02:02 -0500
Subject: [PATCH 33/42] CI


From c932afd039b3ff6bcfbdbe0e0f788566c836b3f5 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Tue, 23 Jul 2019 19:36:04 -0500
Subject: [PATCH 34/42] TST: make sure every public method on Grouper is
 accounted for

---
 pandas/core/groupby/base.py            | 33 ++++++++++++++++++++++++++
 pandas/tests/groupby/test_whitelist.py | 26 ++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 2c9734b3b6ed6..26392d7029703 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -158,6 +158,39 @@ def _gotitem(self, key, ndim, subset=None):
     ]
 )
 
+# these are methods which don't belong
+# in either of the above groups
+groupby_other_methods = frozenset(
+    [
+        "agg",
+        "aggregate",
+        "apply",
+        "boxplot",
+        # corr and cove return a MultiIndex so the result is not like-index
+        # and therefore not a transformation
+        "corr",
+        "cov",
+        "describe",
+        "dtypes",
+        "expanding",
+        "filter",
+        "get_group",
+        "groups",
+        "head",
+        "hist",
+        "indices",
+        "ndim",
+        "ngroups",
+        "ohlc",
+        "pipe",
+        "plot",
+        "resample",
+        "rolling",
+        "tail",
+        "take",
+        "transform",
+    ]
+)
 # Valid values  of `name` for `groupby.transform(name)`
 # NOTE: do NOT edit this directly. New additions should be inserted
 # into the appropriate list above.
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index ee380c6108c38..d96b3efddec2a 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -9,6 +9,11 @@
 import pytest
 
 from pandas import DataFrame, Index, MultiIndex, Series, date_range
+from pandas.core.groupby.base import (
+    groupby_other_methods,
+    reduction_kernels,
+    transformation_kernels,
+)
 from pandas.util import testing as tm
 
 AGG_FUNCTIONS = [
@@ -376,3 +381,24 @@ def test_groupby_selection_with_methods(df):
     tm.assert_frame_equal(
         g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3)
     )
+
+
+def test_all_methods_categorized(mframe):
+    grp = mframe.groupby(mframe.iloc[:, 0])
+    names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns)
+    names -= reduction_kernels
+    names -= transformation_kernels
+    names -= groupby_other_methods
+
+    # each name can appear in only one list
+    assert not (reduction_kernels & transformation_kernels)
+    assert not (reduction_kernels & groupby_other_methods)
+    assert not (transformation_kernels & groupby_other_methods)
+
+    if names:
+        # each name must appear in one of the lists under
+        # pandas.core.groupby.base:
+        # - reduction_kernels
+        # - transformation_kernels
+        # - groupby_other_methods
+        raise RuntimeError("Uncatgeorized Groupby Methods: {names}".format(names=names))

From 5f107200fe160afc6b5c7e79d4524e830c0b86a1 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Tue, 23 Jul 2019 19:37:22 -0500
Subject: [PATCH 35/42] comment

---
 pandas/core/groupby/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 26392d7029703..bc00cd45e2333 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -105,7 +105,7 @@ def _gotitem(self, key, ndim, subset=None):
 cython_cast_blacklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"])
 
 # List of aggregation/reduction functions.
-# These map each series/column to a single value
+# These map each group to a single value
 reduction_kernels = frozenset(
     [
         "all",

From 4b186a4e69ea84019271327691e16347c2643f9b Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Tue, 23 Jul 2019 19:38:09 -0500
Subject: [PATCH 36/42] comment

---
 pandas/core/groupby/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index bc00cd45e2333..b564537c86575 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -158,8 +158,8 @@ def _gotitem(self, key, ndim, subset=None):
     ]
 )
 
-# these are methods which don't belong
-# in either of the above groups
+# these are all the public methods on Grouper which don't belong
+# in either of the above lists
 groupby_other_methods = frozenset(
     [
         "agg",

From de0c11ce7d02e7383e10d37e62becae785f53126 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Wed, 24 Jul 2019 01:24:05 -0500
Subject: [PATCH 37/42] comment

---
 pandas/core/groupby/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index b564537c86575..dcf8e1aa22516 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -166,8 +166,8 @@ def _gotitem(self, key, ndim, subset=None):
         "aggregate",
         "apply",
         "boxplot",
-        # corr and cove return a MultiIndex so the result is not like-index
-        # and therefore not a transformation
+        # corr and cov return ngroups*ncolumns rows, so they
+        # are neither a transformation nor a reduction
         "corr",
         "cov",
         "describe",

From cba02d6c78e905667241f8315f14370f3ee741c2 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Wed, 24 Jul 2019 11:10:01 -0500
Subject: [PATCH 38/42] fix review

---
 doc/source/whatsnew/v1.0.0.rst         |  2 +-
 pandas/core/groupby/base.py            |  2 +-
 pandas/tests/groupby/test_transform.py |  2 +-
 pandas/tests/groupby/test_whitelist.py | 47 ++++++++++++++++++++------
 4 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index d57b86a619ac5..f6f81a059ce70 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -39,7 +39,7 @@ Backwards incompatible API changes
 
 .. _whatsnew_1000.api.other:
 
-- :meth:`GroupBy.transform(name)` now raises on invalid operation names (:issue:`27489`).
+- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
 -
 
 Other API changes
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index dcf8e1aa22516..c0d165f263cef 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -105,7 +105,7 @@ def _gotitem(self, key, ndim, subset=None):
 cython_cast_blacklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"])
 
 # List of aggregation/reduction functions.
-# These map each group to a single value
+# These map each group to a single numeric value
 reduction_kernels = frozenset(
     [
         "all",
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index df63e0438eb3b..9c9730da6c279 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1039,7 +1039,7 @@ def test_transform_agg_by_name(reduction_func, obj):
     result = g.transform(func, *args)
     tm.assert_index_equal(result.index, obj.index)
 
-    # check values replicated broadcasted across group
+    # verify that values were broadcasted across each group
     assert len(set(DataFrame(result).iloc[-3:, -1])) == 1
 
 
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index d96b3efddec2a..c2304ae70fe21 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -386,19 +386,44 @@ def test_groupby_selection_with_methods(df):
 def test_all_methods_categorized(mframe):
     grp = mframe.groupby(mframe.iloc[:, 0])
     names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns)
-    names -= reduction_kernels
-    names -= transformation_kernels
-    names -= groupby_other_methods
+    new_names = set(names)
+    new_names -= reduction_kernels
+    new_names -= transformation_kernels
+    new_names -= groupby_other_methods
 
-    # each name can appear in only one list
     assert not (reduction_kernels & transformation_kernels)
     assert not (reduction_kernels & groupby_other_methods)
     assert not (transformation_kernels & groupby_other_methods)
 
-    if names:
-        # each name must appear in one of the lists under
-        # pandas.core.groupby.base:
-        # - reduction_kernels
-        # - transformation_kernels
-        # - groupby_other_methods
-        raise RuntimeError("Uncatgeorized Groupby Methods: {names}".format(names=names))
+    # new public method?
+    if new_names:
+        msg = """
+There are uncatgeorized methods defined on the Grouper class:
+{names}.
+
+IWas a new method just added?
+
+Every public method On Grouper must appear in exactly one the
+following three lists defined in pandas.core.groupby.base:
+- `reduction_kernels`
+- `transformation_kernels`
+- `groupby_other_methods`
+see the comments in pandas/core/groupby/base.py for guidance on
+how to fix this test.
+        """
+        raise AssertionError(msg.format(names=names))
+
+    # removed a public method?
+    all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods
+    print(names)
+    print(all_categorized)
+    if not (names == all_categorized):
+        msg = """
+Some methods which are supposed to be on the Grouper class
+are missing:
+{names}.
+
+They're still defined in one of the lists that live in pandas/core/groupby/base.py.
+If you removed a method, you should update them
+"""
+        raise AssertionError(msg.format(names=all_categorized - names))

From b19a46f65bc9260d6be2b6ac569e7de861f81f10 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Wed, 24 Jul 2019 11:24:24 -0500
Subject: [PATCH 39/42] message

---
 pandas/tests/groupby/test_whitelist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index c2304ae70fe21..05d745ccc0e8e 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -401,7 +401,7 @@ def test_all_methods_categorized(mframe):
 There are uncatgeorized methods defined on the Grouper class:
 {names}.
 
-IWas a new method just added?
+Was a new method recently added?
 
 Every public method On Grouper must appear in exactly one the
 following three lists defined in pandas.core.groupby.base:

From 3511480605208852badcb65f7a25b409e0ede643 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Wed, 24 Jul 2019 11:28:13 -0500
Subject: [PATCH 40/42] Update test

---
 pandas/tests/groupby/test_transform.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 9c9730da6c279..a28d17a79a436 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1037,7 +1037,11 @@ def test_transform_agg_by_name(reduction_func, obj):
     args = {"nth": [0], "quantile": [0.5]}.get(func, [])
 
     result = g.transform(func, *args)
+
+    # this is the *definition* of a transformation
     tm.assert_index_equal(result.index, obj.index)
+    if hasattr(obj, "columns"):
+        tm.assert_index_equal(result.columns, obj.columns)
 
     # verify that values were broadcasted across each group
     assert len(set(DataFrame(result).iloc[-3:, -1])) == 1

From b5d393102ef7decafb94564484c01d25e07863ac Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 25 Jul 2019 09:05:21 -0500
Subject: [PATCH 41/42] changes

---
 pandas/core/groupby/base.py            | 6 ++++--
 pandas/core/groupby/groupby.py         | 2 +-
 pandas/tests/groupby/test_transform.py | 7 ++++++-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index c0d165f263cef..679faa054cc6b 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -124,6 +124,8 @@ def _gotitem(self, key, ndim, subset=None):
         "nth",
         "nunique",
         "prod",
+        # as long as `quantile`'s signature accepts only
+        # a single quantile value, it's a reduction.
         "quantile",
         "sem",
         "size",
@@ -135,8 +137,8 @@ def _gotitem(self, key, ndim, subset=None):
 )
 
 # List of transformation functions.
-# These map each group to a like-indexed result object
-# like-indexed means *same* index and *same* columns.
+# a transformation is a function that, for each group,
+# produces a result that has the same shape as the group.
 transformation_kernels = frozenset(
     [
         "backfill",
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 9aba9723e0546..3d4dbd3f8d887 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -261,7 +261,7 @@ class providing the base-class of operations.
 
 * f must return a value that either has the same shape as the input
   subframe or can be broadcast to the shape of the input subframe.
-  For example, f returns a scalar it will be broadcast to have the
+  For example, if `f` returns a scalar it will be broadcast to have the
   same shape as the input subframe.
 * if this is a DataFrame, f must support application column-by-column
   in the subframe. If f also supports application to the entire subframe,
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index a28d17a79a436..d3972e6ba9008 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -1023,7 +1023,12 @@ def test_transform_invalid_name_raises():
 
 @pytest.mark.parametrize(
     "obj",
-    [DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))), Series([0, 0, 0, 1, 1, 1])],
+    [
+        DataFrame(
+            dict(a=[0, 0, 0, 1, 1, 1], b=range(6)), index=["A", "B", "C", "D", "E", "F"]
+        ),
+        Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]),
+    ],
 )
 def test_transform_agg_by_name(reduction_func, obj):
     func = reduction_func

From e9343e1c1506a57a55a7561bc61850b36daea1d9 Mon Sep 17 00:00:00 2001
From: pilkibun <pilkibun@users.noreply.github.com>
Date: Thu, 25 Jul 2019 13:17:58 -0500
Subject: [PATCH 42/42] reference issue

---
 pandas/core/groupby/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 679faa054cc6b..fc3bb69afd0cb 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -126,6 +126,7 @@ def _gotitem(self, key, ndim, subset=None):
         "prod",
         # as long as `quantile`'s signature accepts only
         # a single quantile value, it's a reduction.
+        # GH#27526 might change that.
         "quantile",
         "sem",
         "size",