Skip to content

DOC/API: pd.Grouper docs / api #6655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,7 @@ Indexing, iteration
GroupBy.groups
GroupBy.indices
GroupBy.get_group
Grouper

Function application
~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ API Changes
``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments
will not be supported in a future release (:issue:`5505`)

- Allow specification of a more complex groupby, via ``pd.Groupby`` (:issue:`3794`)
- Allow specification of a more complex groupby, via ``pd.Grouper`` (:issue:`3794`)

- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of
the index, rather than requiring a list of tuple (:issue:`4370`)
Expand Down
2 changes: 1 addition & 1 deletion doc/source/v0.14.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ These are out-of-bounds selections

g.nth(0, dropna='any') # similar to old behaviour

- Allow specification of a more complex groupby via ``pd.Groupby``, such as grouping
- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping
by a Time and a string field simultaneously. See :ref:`the docs <groupby.specify>`. (:issue:`3794`)

- Local variable usage has changed in
Expand Down
42 changes: 27 additions & 15 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,25 +141,37 @@ def _last(x):

class Grouper(object):
"""
A Grouper allows the user to specify a groupby instruction
A Grouper allows the user to specify a groupby instruction for a target object

This specification will select a column via the key parameter, or if the level and/or
axis parameters are given, a level of the index of the target object.

These are local specifications and will override 'global' settings, that is the parameters
axis and level which are passed to the groupby itself.

Parameters
----------
key : groupby key, default None
level : name, int level number, default None
freq : string / freqency object, default None
sort : boolean, whether to sort the resulting labels, default True
key : string, defaults to None
groupby key, which selects the grouping column of the target
level : name/number, defaults to None
the level for the target index
freq : string / freqency object, defaults to None
This will groupby the specified frequency if the target selection (via key or level) is
a datetime-like object
axis : number/name of the axis, defaults to None
sort : boolean, default to False
whether to sort the resulting labels

Returns
-------
A specification for a groupby instruction

Examples
--------
df.groupby(Group(key='A')) : syntatic sugar for df.groupby('A')
df.groupby(Group(key='date',freq='60s')) : specify a resample on the column 'date'
df.groupby(Group(level='date',freq='60s',axis=1)) :
specify a resample on the level 'date' on the columns axis with a frequency of 60s
>>> df.groupby(Grouper(key='A')) : syntatic sugar for df.groupby('A')
>>> df.groupby(Grouper(key='date',freq='60s')) : specify a resample on the column 'date'
>>> df.groupby(Grouper(level='date',freq='60s',axis=1)) :
specify a resample on the level 'date' on the columns axis with a frequency of 60s

"""

Expand All @@ -186,7 +198,7 @@ def __init__(self, key=None, level=None, freq=None, axis=None, sort=False):
def ax(self):
return self.grouper

def get_grouper(self, obj):
def _get_grouper(self, obj):

"""
Parameters
Expand All @@ -198,10 +210,10 @@ def get_grouper(self, obj):
a tuple of binner, grouper, obj (possibly sorted)
"""

self.set_grouper(obj)
self._set_grouper(obj)
return self.binner, self.grouper, self.obj

def set_grouper(self, obj, sort=False):
def _set_grouper(self, obj, sort=False):
"""
given an object and the specifcations, setup the internal grouper for this particular specification

Expand Down Expand Up @@ -252,7 +264,7 @@ def set_grouper(self, obj, sort=False):
self.grouper = ax
return self.grouper

def get_binner_for_grouping(self, obj):
def _get_binner_for_grouping(self, obj):
raise NotImplementedError

@property
Expand Down Expand Up @@ -1685,7 +1697,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
elif isinstance(self.grouper, Grouper):

# get the new grouper
grouper = self.grouper.get_binner_for_grouping(self.obj)
grouper = self.grouper._get_binner_for_grouping(self.obj)
self.obj = self.grouper.obj
self.grouper = grouper
if self.name is None:
Expand Down Expand Up @@ -1795,7 +1807,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):

# a passed in Grouper, directly convert
if isinstance(key, Grouper):
binner, grouper, obj = key.get_grouper(obj)
binner, grouper, obj = key._get_grouper(obj)
return grouper, [], obj

# already have a BaseGrouper, just return it
Expand Down
20 changes: 10 additions & 10 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean',
super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)

def resample(self, obj):
self.set_grouper(obj, sort=True)
self._set_grouper(obj, sort=True)
ax = self.grouper

if isinstance(ax, DatetimeIndex):
Expand All @@ -93,7 +93,7 @@ def resample(self, obj):
rs = self._resample_periods()
else:
obj = self.obj.to_timestamp(how=self.convention)
self.set_grouper(obj)
self._set_grouper(obj)
rs = self._resample_timestamps()
elif len(ax) == 0:
return self.obj
Expand All @@ -104,11 +104,11 @@ def resample(self, obj):
rs_axis.name = ax.name
return rs

def get_grouper(self, obj):
self.set_grouper(obj)
return self.get_binner_for_resample()
def _get_grouper(self, obj):
self._set_grouper(obj)
return self._get_binner_for_resample()

def get_binner_for_resample(self):
def _get_binner_for_resample(self):
# create the BinGrouper
# assume that self.set_grouper(obj) has already been called

Expand All @@ -121,12 +121,12 @@ def get_binner_for_resample(self):
self.grouper = BinGrouper(bins, binlabels)
return self.binner, self.grouper, self.obj

def get_binner_for_grouping(self, obj):
def _get_binner_for_grouping(self, obj):
# return an ordering of the transformed group labels,
# suitable for multi-grouping, e.g the labels for
# the resampled intervals
ax = self.set_grouper(obj)
self.get_binner_for_resample()
ax = self._set_grouper(obj)
self._get_binner_for_resample()

# create the grouper
binner = self.binner
Expand Down Expand Up @@ -233,7 +233,7 @@ def _resample_timestamps(self):
# assumes set_grouper(obj) already called
axlabels = self.ax

self.get_binner_for_resample()
self._get_binner_for_resample()
grouper = self.grouper
binner = self.binner
obj = self.obj
Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def test_apply_iteration(self):
df = DataFrame({'open': 1, 'close': 2}, index=ind)
tg = TimeGrouper('M')

_, grouper, _ = tg.get_grouper(df)
_, grouper, _ = tg._get_grouper(df)

# Errors
grouped = df.groupby(grouper, group_keys=False)
Expand All @@ -1151,7 +1151,7 @@ def test_panel_aggregation(self):
minor_axis=['A', 'B', 'C', 'D'])

tg = TimeGrouper('M', axis=1)
_, grouper, _ = tg.get_grouper(wp)
_, grouper, _ = tg._get_grouper(wp)
bingrouped = wp.groupby(grouper)
binagg = bingrouped.mean()

Expand Down