pandas-dev · jreback · Mar 17, 2014 · Mar 17, 2014 · Mar 17, 2014
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1193,6 +1193,7 @@ Indexing, iteration
    GroupBy.groups
    GroupBy.indices
    GroupBy.get_group
+   Grouper
 
 Function application
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -132,7 +132,7 @@ API Changes
   ``FutureWarning`` is raised  to alert that the old ``rows`` and ``cols`` arguments
   will not be supported in a future release (:issue:`5505`)
 
-- Allow specification of a more complex groupby, via ``pd.Groupby`` (:issue:`3794`)
+- Allow specification of a more complex groupby, via ``pd.Grouper`` (:issue:`3794`)
 
 - A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of
   the index, rather than requiring a list of tuple (:issue:`4370`)

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -94,7 +94,7 @@ These are out-of-bounds selections
 
      g.nth(0, dropna='any')  # similar to old behaviour
 
-- Allow specification of a more complex groupby via ``pd.Groupby``, such as grouping
+- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping
   by a Time and a string field simultaneously. See :ref:`the docs <groupby.specify>`. (:issue:`3794`)
 
 - Local variable usage has changed in

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -141,25 +141,37 @@ def _last(x):
 
 class Grouper(object):
     """
-    A Grouper allows the user to specify a groupby instruction
+    A Grouper allows the user to specify a groupby instruction for a target object
+
+    This specification will select a column via the key parameter, or if the level and/or
+    axis parameters are given, a level of the index of the target object.
+
+    These are local specifications and will override 'global' settings, that is the parameters
+    axis and level which are passed to the groupby itself.
 
     Parameters
     ----------
-    key : groupby key, default None
-    level : name, int level number, default None
-    freq : string / freqency object, default None
-    sort : boolean, whether to sort the resulting labels, default True
+    key : string, defaults to None
+        groupby key, which selects the grouping column of the target
+    level : name/number, defaults to None
+        the level for the target index
+    freq : string / freqency object, defaults to None
+        This will groupby the specified frequency if the target selection (via key or level) is
+        a datetime-like object
+    axis : number/name of the axis, defaults to None
+    sort : boolean, default to False
+        whether to sort the resulting labels
 
     Returns
     -------
     A specification for a groupby instruction
 
     Examples
     --------
-    df.groupby(Group(key='A')) : syntatic sugar for df.groupby('A')
-    df.groupby(Group(key='date',freq='60s')) : specify a resample on the column 'date'
-    df.groupby(Group(level='date',freq='60s',axis=1)) :
-       specify a resample on the level 'date' on the columns axis with a frequency of 60s
+    >>> df.groupby(Grouper(key='A')) : syntatic sugar for df.groupby('A')
+    >>> df.groupby(Grouper(key='date',freq='60s')) : specify a resample on the column 'date'
+    >>> df.groupby(Grouper(level='date',freq='60s',axis=1)) :
+        specify a resample on the level 'date' on the columns axis with a frequency of 60s
 
     """
 
@@ -186,7 +198,7 @@ def __init__(self, key=None, level=None, freq=None, axis=None, sort=False):
     def ax(self):
         return self.grouper
 
-    def get_grouper(self, obj):
+    def _get_grouper(self, obj):
 
         """
         Parameters
@@ -198,10 +210,10 @@ def get_grouper(self, obj):
         a tuple of binner, grouper, obj (possibly sorted)
         """
 
-        self.set_grouper(obj)
+        self._set_grouper(obj)
         return self.binner, self.grouper, self.obj
 
-    def set_grouper(self, obj, sort=False):
+    def _set_grouper(self, obj, sort=False):
         """
         given an object and the specifcations, setup the internal grouper for this particular specification
 
@@ -252,7 +264,7 @@ def set_grouper(self, obj, sort=False):
         self.grouper = ax
         return self.grouper
 
-    def get_binner_for_grouping(self, obj):
+    def _get_binner_for_grouping(self, obj):
         raise NotImplementedError
 
     @property
@@ -1685,7 +1697,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
             elif isinstance(self.grouper, Grouper):
 
                 # get the new grouper
-                grouper = self.grouper.get_binner_for_grouping(self.obj)
+                grouper = self.grouper._get_binner_for_grouping(self.obj)
                 self.obj = self.grouper.obj
                 self.grouper = grouper
                 if self.name is None:
@@ -1795,7 +1807,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
 
     # a passed in Grouper, directly convert
     if isinstance(key, Grouper):
-        binner, grouper, obj = key.get_grouper(obj)
+        binner, grouper, obj = key._get_grouper(obj)
         return grouper, [], obj
 
     # already have a BaseGrouper, just return it

diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
@@ -76,7 +76,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
         super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
 
     def resample(self, obj):
-        self.set_grouper(obj, sort=True)
+        self._set_grouper(obj, sort=True)
         ax = self.grouper
 
         if isinstance(ax, DatetimeIndex):
@@ -93,7 +93,7 @@ def resample(self, obj):
                 rs = self._resample_periods()
             else:
                 obj = self.obj.to_timestamp(how=self.convention)
-                self.set_grouper(obj)
+                self._set_grouper(obj)
                 rs = self._resample_timestamps()
         elif len(ax) == 0:
             return self.obj
@@ -104,11 +104,11 @@ def resample(self, obj):
         rs_axis.name = ax.name
         return rs
 
-    def get_grouper(self, obj):
-        self.set_grouper(obj)
-        return self.get_binner_for_resample()
+    def _get_grouper(self, obj):
+        self._set_grouper(obj)
+        return self._get_binner_for_resample()
 
-    def get_binner_for_resample(self):
+    def _get_binner_for_resample(self):
         # create the BinGrouper
         # assume that self.set_grouper(obj) has already been called
 
@@ -121,12 +121,12 @@ def get_binner_for_resample(self):
         self.grouper = BinGrouper(bins, binlabels)
         return self.binner, self.grouper, self.obj
 
-    def get_binner_for_grouping(self, obj):
+    def _get_binner_for_grouping(self, obj):
         # return an ordering of the transformed group labels,
         # suitable for multi-grouping, e.g the labels for
         # the resampled intervals
-        ax = self.set_grouper(obj)
-        self.get_binner_for_resample()
+        ax = self._set_grouper(obj)
+        self._get_binner_for_resample()
 
         # create the grouper
         binner = self.binner
@@ -233,7 +233,7 @@ def _resample_timestamps(self):
         # assumes set_grouper(obj) already called
         axlabels = self.ax
 
-        self.get_binner_for_resample()
+        self._get_binner_for_resample()
         grouper = self.grouper
         binner = self.binner
         obj = self.obj

diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
@@ -1134,7 +1134,7 @@ def test_apply_iteration(self):
         df = DataFrame({'open': 1, 'close': 2}, index=ind)
         tg = TimeGrouper('M')
 
-        _, grouper, _ = tg.get_grouper(df)
+        _, grouper, _ = tg._get_grouper(df)
 
         # Errors
         grouped = df.groupby(grouper, group_keys=False)
@@ -1151,7 +1151,7 @@ def test_panel_aggregation(self):
                       minor_axis=['A', 'B', 'C', 'D'])
 
         tg = TimeGrouper('M', axis=1)
-        _, grouper, _ = tg.get_grouper(wp)
+        _, grouper, _ = tg._get_grouper(wp)
         bingrouped = wp.groupby(grouper)
         binagg = bingrouped.mean()