pydata · shoyer · Jul 8, 2016 · Apr 6, 2016 · Apr 15, 2016 · May 10, 2016
diff --git a/doc/api.rst b/doc/api.rst
@@ -109,6 +109,7 @@ Computation
    Dataset.apply
    Dataset.reduce
    Dataset.groupby
+   Dataset.groupby_bins
    Dataset.resample
    Dataset.diff
 
@@ -245,6 +246,7 @@ Computation
 
    DataArray.reduce
    DataArray.groupby
+   DataArray.groupby_bins
    DataArray.rolling
    DataArray.resample
    DataArray.get_axis_num

diff --git a/doc/examples.rst b/doc/examples.rst
@@ -7,3 +7,4 @@ Examples
     examples/quick-overview
     examples/weather-data
     examples/monthly-means
+    examples/multidimensional-coords
diff --git a/doc/examples/multidimensional-coords.rst b/doc/examples/multidimensional-coords.rst
@@ -0,0 +1,201 @@
+.. _examples.multidim:
+
+Working with Multidimensional Coordinates
+=========================================
+
+Author: `Ryan Abernathey <http://github.org/rabernat>`__
+
+Many datasets have *physical coordinates* which differ from their
+*logical coordinates*. Xarray provides several ways to plot and analyze
+such datasets.
+
+.. code:: python
+
+    %matplotlib inline
+    import numpy as np
+    import pandas as pd
+    import xarray as xr
+    import cartopy.crs as ccrs
+    from matplotlib import pyplot as plt
+
+    print("numpy version  : ", np.__version__)
+    print("pandas version : ", pd.__version__)
+    print("xarray version   : ", xr.version.version)
+
+
+.. parsed-literal::
+
+    ('numpy version  : ', '1.11.0')
+    ('pandas version : ', u'0.18.0')
+    ('xarray version   : ', '0.7.2-32-gf957eb8')
+
+
+As an example, consider this dataset from the
+`xarray-data <https://github.com/pydata/xarray-data>`__ repository.
+
+.. code:: python
+
+    ! curl -L -O https://github.com/pydata/xarray-data/raw/master/RASM_example_data.nc
+
+.. code:: python
+
+    ds = xr.open_dataset('RASM_example_data.nc')
+    ds
+
+
+
+
+.. parsed-literal::
+
+    <xarray.Dataset>
+    Dimensions:  (time: 36, x: 275, y: 205)
+    Coordinates:
+      * time     (time) datetime64[ns] 1980-09-16T12:00:00 1980-10-17 ...
+        yc       (y, x) float64 16.53 16.78 17.02 17.27 17.51 17.76 18.0 18.25 ...
+        xc       (y, x) float64 189.2 189.4 189.6 189.7 189.9 190.1 190.2 190.4 ...
+      * x        (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
+      * y        (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ...
+    Data variables:
+        Tair     (time, y, x) float64 nan nan nan nan nan nan nan nan nan nan ...
+    Attributes:
+        title: /workspace/jhamman/processed/R1002RBRxaaa01a/lnd/temp/R1002RBRxaaa01a.vic.ha.1979-09-01.nc
+        institution: U.W.
+        source: RACM R1002RBRxaaa01a
+        output_frequency: daily
+        output_mode: averaged
+        convention: CF-1.4
+        references: Based on the initial model of Liang et al., 1994, JGR, 99, 14,415- 14,429.
+        comment: Output from the Variable Infiltration Capacity (VIC) model.
+        nco_openmp_thread_number: 1
+        NCO: 4.3.7
+        history: history deleted for brevity
+
+
+
+In this example, the *logical coordinates* are ``x`` and ``y``, while
+the *physical coordinates* are ``xc`` and ``yc``, which represent the
+latitudes and longitude of the data.
+
+.. code:: python
+
+    print(ds.xc.attrs)
+    print(ds.yc.attrs)
+
+
+.. parsed-literal::
+
+    OrderedDict([(u'long_name', u'longitude of grid cell center'), (u'units', u'degrees_east'), (u'bounds', u'xv')])
+    OrderedDict([(u'long_name', u'latitude of grid cell center'), (u'units', u'degrees_north'), (u'bounds', u'yv')])
+
+
+Plotting
+--------
+
+Let's examine these coordinate variables by plotting them.
+
+.. code:: python
+
+    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14,4))
+    ds.xc.plot(ax=ax1)
+    ds.yc.plot(ax=ax2)
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.collections.QuadMesh at 0x118688fd0>
+
+
+
+.. parsed-literal::
+
+    /Users/rpa/anaconda/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
+      if self._edgecolors == str('face'):
+
+
+
+.. image:: multidimensional_coords_files/xarray_multidimensional_coords_8_2.png
+
+
+Note that the variables ``xc`` (longitude) and ``yc`` (latitude) are
+two-dimensional scalar fields.
+
+If we try to plot the data variable ``Tair``, by default we get the
+logical coordinates.
+
+.. code:: python
+
+    ds.Tair[0].plot()
+
+
+
+
+.. parsed-literal::
+
+    <matplotlib.collections.QuadMesh at 0x11b6da890>
+
+
+
+
+.. image:: multidimensional_coords_files/xarray_multidimensional_coords_10_1.png
+
+
+In order to visualize the data on a conventional latitude-longitude
+grid, we can take advantage of xarray's ability to apply
+`cartopy <http://scitools.org.uk/cartopy/index.html>`__ map projections.
+
+.. code:: python
+
+    plt.figure(figsize=(14,6))
+    ax = plt.axes(projection=ccrs.PlateCarree())
+    ax.set_global()
+    ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), x='xc', y='yc', add_colorbar=False)
+    ax.coastlines()
+    ax.set_ylim([0,90]);
+
+
+
+.. image:: multidimensional_coords_files/xarray_multidimensional_coords_12_0.png
+
+
+Multidimensional Groupby
+------------------------
+
+The above example allowed us to visualize the data on a regular
+latitude-longitude grid. But what if we want to do a calculation that
+involves grouping over one of these physical coordinates (rather than
+the logical coordinates), for example, calculating the mean temperature
+at each latitude. This can be achieved using xarray's ``groupby``
+function, which accepts multidimensional variables. By default,
+``groupby`` will use every unique value in the variable, which is
+probably not what we want. Instead, we can use the ``groupby_bins``
+function to specify the output coordinates of the group.
+
+.. code:: python
+
+    # define two-degree wide latitude bins
+    lat_bins = np.arange(0,91,2)
+    # define a label for each bin corresponding to the central latitude
+    lat_center = np.arange(1,90,2)
+    # group according to those bins and take the mean
+    Tair_lat_mean = ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center).mean()
+    # plot the result
+    Tair_lat_mean.plot()
+
+
+
+
+.. parsed-literal::
+
+    [<matplotlib.lines.Line2D at 0x11cb92e90>]
+
+
+
+
+.. image:: multidimensional_coords_files/xarray_multidimensional_coords_14_1.png
+
+
+Note that the resulting coordinate for the ``groupby_bins`` operation
+got the ``_bins`` suffix appended: ``xc_bins``. This help us distinguish
+it from the original multidimensional variable ``xc``.
diff --git a/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_10_1.png b/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_10_1.png
diff --git a/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_12_0.png b/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_12_0.png
diff --git a/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_14_1.png b/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_14_1.png
diff --git a/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_8_2.png b/doc/examples/multidimensional_coords_files/xarray_multidimensional_coords_8_2.png
diff --git a/doc/groupby.rst b/doc/groupby.rst
@@ -14,10 +14,11 @@ __ http://www.jstatsoft.org/v40/i01/paper
 - Combine your groups back into a single data object.
 
 Group by operations work on both :py:class:`~xarray.Dataset` and
-:py:class:`~xarray.DataArray` objects. Currently, you can only group by a single
-one-dimensional variable (eventually, we hope to remove this limitation). Also,
-note that for one-dimensional data, it is usually faster to rely on pandas'
-implementation of the same pipeline.
+:py:class:`~xarray.DataArray` objects. Most of the examples focus on grouping by
+a single one-dimensional variable, although support for grouping
+over a multi-dimensional variable has recently been implemented. Note that for
+one-dimensional data, it is usually faster to rely on pandas' implementation of
+the same pipeline.
 
 Split
 ~~~~~
@@ -63,6 +64,33 @@ You can also iterate over over groups in ``(label, group)`` pairs:
 Just like in pandas, creating a GroupBy object is cheap: it does not actually
 split the data until you access particular values.
 
+Binning
+~~~~~~~
+
+Sometimes you don't want to use all the unique values to determine the groups
+but instead want to "bin" the data into coarser groups. You could always create
+a customized coordinate, but xarray facilitates this via the
+:py:meth:`~xarray.Dataset.groupby_bins` method.
+
+.. ipython:: python
+
+    x_bins = [0,25,50]
+    ds.groupby_bins('x', x_bins).groups
+
+The binning is implemented via `pandas.cut`__, whose documentation details how
+the bins are assigned. As seen in the example above, by default, the bins are
+labeled with strings using set notation to precisely identify the bin limits. To
+override this behavior, you can specify the bin labels explicitly. Here we
+choose `float` labels which identify the bin centers:
+
+.. ipython:: python
+
+    x_bin_labels = [12.5,37.5]
+    ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups
+
+__ http://pandas.pydata.org/pandas-docs/version/0.17.1/generated/pandas.cut.html
+
+
 Apply
 ~~~~~
 
@@ -149,3 +177,33 @@ guarantee that all original dimensions remain unchanged.
 
 You can always squeeze explicitly later with the Dataset or DataArray
 :py:meth:`~xarray.DataArray.squeeze` methods.
+
+.. _groupby.multidim:
+
+Multidimensional Grouping
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Many datasets have a multidimensional coordinate variable (e.g. longitude)
+which is different from the logical grid dimensions (e.g. nx, ny). Such
+variables are valid under the `CF conventions`__. Xarray supports groupby
+operations over multidimensional coordinate variables:
+
+__ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimensional_latitude_longitude_coordinate_variables
+
+.. ipython:: python
+
+    da = xr.DataArray([[0,1],[2,3]],
+        coords={'lon': (['ny','nx'], [[30,40],[40,50]] ),
+                'lat': (['ny','nx'], [[10,10],[20,20]] ),},
+        dims=['ny','nx'])
+    da
+    da.groupby('lon').sum()
+    da.groupby('lon').apply(lambda x: x - x.mean(), shortcut=False)
+
+Because multidimensional groups have the ability to generate a very large
+number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins`
+may be desirable:
+
+.. ipython:: python
+
+    da.groupby_bins('lon', [0,45,50]).sum()
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -30,6 +30,12 @@ Breaking changes
 Enhancements
 ~~~~~~~~~~~~
 
+- Groupby operations now support grouping over multidimensional variables. A new
+  method called :py:meth:`~xarray.Dataset.groupby_bins` has also been added to
+  allow users to specify bins for grouping. The new features are described in
+  :ref:`groupby.multidim` and :ref:`examples.multidim`.
+  By `Ryan Abernathey <http://github.com/rabernat>`_.
+
 - DataArray and Dataset method :py:meth:`where` now supports a ``drop=True``
   option that clips coordinate elements that are fully masked.  By
   `Phillip J. Wolfram <https://github.com/pwolfram>`_.

diff --git a/examples/xarray_multidimensional_coords.ipynb b/examples/xarray_multidimensional_coords.ipynb
diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -343,6 +343,60 @@ def groupby(self, group, squeeze=True):
             group = self[group]
         return self.groupby_cls(self, group, squeeze=squeeze)
 
+    def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
+                     include_lowest=False, squeeze=True):
+        """Returns a GroupBy object for performing grouped operations.
+
+        Rather than using all unique values of `group`, the values are discretized
+        first by applying `pandas.cut` [1]_ to `group`.
+
+        Parameters
+        ----------
+        group : str, DataArray or Coordinate
+            Array whose binned values should be used to group this array. If a
+            string, must be the name of a variable contained in this dataset.
+        bins : int or array of scalars
+            If bins is an int, it defines the number of equal-width bins in the
+            range of x. However, in this case, the range of x is extended by .1%
+            on each side to include the min or max values of x. If bins is a
+            sequence it defines the bin edges allowing for non-uniform bin
+            width. No extension of the range of x is done in this case.
+        right : boolean, optional
+            Indicates whether the bins include the rightmost edge or not. If
+            right == True (the default), then the bins [1,2,3,4] indicate
+            (1,2], (2,3], (3,4].
+        labels : array or boolean, default None
+            Used as labels for the resulting bins. Must be of the same length as
+            the resulting bins. If False, string bin labels are assigned by
+            `pandas.cut`.
+        precision : int
+            The precision at which to store and display the bins labels.
+        include_lowest : bool
+            Whether the first interval should be left-inclusive or not.
+        squeeze : boolean, optional
+            If "group" is a dimension of any arrays in this dataset, `squeeze`
+            controls whether the subarrays have a dimension of length 1 along
+            that dimension or if the dimension is squeezed out.
+
+        Returns
+        -------
+        grouped : GroupBy
+            A `GroupBy` object patterned after `pandas.GroupBy` that can be
+            iterated over in the form of `(unique_value, grouped_array)` pairs.
+            The name of the group has the added suffix `_bins` in order to
+            distinguish it from the original variable.
+
+        References
+        ----------
+        .. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
+        """
+        if isinstance(group, basestring):
+            group = self[group]
+        return self.groupby_cls(self, group, squeeze=squeeze, bins=bins,
+                                cut_kwargs={'right': right, 'labels': labels,
+                                            'precision': precision,
+                                            'include_lowest': include_lowest})
+
     def rolling(self, min_periods=None, center=False, **windows):
         """
         Rolling window object.

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -130,7 +130,8 @@ class GroupBy(object):
     Dataset.groupby
     DataArray.groupby
     """
-    def __init__(self, obj, group, squeeze=False, grouper=None):
+    def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
+                    cut_kwargs={}):
         """Create a GroupBy object
 
         Parameters
@@ -145,14 +146,30 @@ def __init__(self, obj, group, squeeze=False, grouper=None):
             if the dimension is squeezed out.
         grouper : pd.Grouper, optional
             Used for grouping values along the `group` array.
+        bins : array-like, optional
+            If `bins` is specified, the groups will be discretized into the
+            specified bins by `pandas.cut`.
+        cut_kwargs : dict, optional
+            Extra keyword arguments to pass to `pandas.cut`
         """
         from .dataset import as_dataset
+        from .dataarray import DataArray
 
-        if group.ndim != 1:
-            # TODO: remove this limitation?
-            raise ValueError('`group` must be 1 dimensional')
         if getattr(group, 'name', None) is None:
             raise ValueError('`group` must have a name')
+        self._stacked_dim = None
+        if group.ndim != 1:
+            # try to stack the dims of the group into a single dim
+            # TODO: figure out how to exclude dimensions from the stacking
+            #       (e.g. group over space dims but leave time dim intact)
+            orig_dims = group.dims
+            stacked_dim_name = 'stacked_' + '_'.join(orig_dims)
+            # the copy is necessary here, otherwise read only array raises error
+            # in pandas: https://github.com/pydata/pandas/issues/12813
+            group = group.stack(**{stacked_dim_name: orig_dims}).copy()
+            obj = obj.stack(**{stacked_dim_name: orig_dims})
+            self._stacked_dim = stacked_dim_name
+            self._unstacked_dims = orig_dims
         if not hasattr(group, 'dims'):
             raise ValueError("`group` must have a 'dims' attribute")
         group_dim, = group.dims
@@ -167,23 +184,31 @@ def __init__(self, obj, group, squeeze=False, grouper=None):
                              'dimension')
         full_index = None
 
+        if grouper is not None and bins is not None:
+            raise TypeError("Can't specify both `grouper` and `bins`.")
+        if bins is not None:
+            binned = pd.cut(group.values, bins, **cut_kwargs)
+            new_dim_name = group.name + '_bins'
+            group = DataArray(binned, group.coords, name=new_dim_name)
         if grouper is not None:
-            # time-series resampling
             index = safe_cast_to_index(group)
             if not index.is_monotonic:
                 # TODO: sort instead of raising an error
                 raise ValueError('index must be monotonic for resampling')
             s = pd.Series(np.arange(index.size), index)
-            first_items = s.groupby(grouper).first()
+            if grouper is not None:
+                first_items = s.groupby(grouper).first()
             if first_items.isnull().any():
                 full_index = first_items.index
                 first_items = first_items.dropna()
-            bins = first_items.values.astype(np.int64)
-            group_indices = ([slice(i, j) for i, j in zip(bins[:-1], bins[1:])] +
-                             [slice(bins[-1], None)])
+            sbins = first_items.values.astype(np.int64)
+            group_indices = ([slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])] +
+                             [slice(sbins[-1], None)])
             unique_coord = Coordinate(group.name, first_items.index)
-        elif group.name in obj.dims:
+        elif group.name in obj.dims and bins is None:
             # assume that group already has sorted, unique values
+            # (if using bins, the group will have the same name as a dimension
+            # but different values)
             if group.dims != (group.name,):
                 raise ValueError('`group` is required to be a coordinate if '
                                  '`group.name` is a dimension in `obj`')
@@ -276,6 +301,13 @@ def _maybe_restore_empty_groups(self, combined):
             combined = combined.reindex(**indexers)
         return combined
 
+    def _maybe_unstack_array(self, arr):
+        """This gets called if we are applying on an array with a
+        multidimensional group."""
+        if self._stacked_dim is not None and self._stacked_dim in arr.dims:
+            arr = arr.unstack(self._stacked_dim)
+        return arr
+
     def fillna(self, value):
         """Fill missing values in this object by group.
 
@@ -394,6 +426,12 @@ def lookup_order(dimension):
         new_order = sorted(stacked.dims, key=lookup_order)
         return stacked.transpose(*new_order)
 
+    def _restore_multiindex(self, combined):
+        if self._stacked_dim is not None and self._stacked_dim in combined.dims:
+            stacked_dim = self.group[self._stacked_dim]
+            combined[self._stacked_dim] = stacked_dim
+        return combined
+
     def apply(self, func, shortcut=False, **kwargs):
         """Apply a function over each array in the group and concatenate them
         together into a new array.
@@ -437,22 +475,22 @@ def apply(self, func, shortcut=False, **kwargs):
             grouped = self._iter_grouped()
         applied = (maybe_wrap_array(arr, func(arr, **kwargs)) for arr in grouped)
         combined = self._concat(applied, shortcut=shortcut)
-        result = self._maybe_restore_empty_groups(combined)
+        result = self._maybe_restore_empty_groups(
+                    self._maybe_unstack_array(combined))
         return result
 
     def _concat(self, applied, shortcut=False):
         # peek at applied to determine which coordinate to stack over
         applied_example, applied = peek_at(applied)
         concat_dim, positions = self._infer_concat_args(applied_example)
-
         if shortcut:
             combined = self._concat_shortcut(applied, concat_dim, positions)
         else:
             combined = concat(applied, concat_dim)
             combined = _maybe_reorder(combined, concat_dim, positions)
-
         if isinstance(combined, type(self.obj)):
             combined = self._restore_dim_order(combined)
+            combined = self._restore_multiindex(combined)
         return combined
 
     def reduce(self, func, dim=None, axis=None, keep_attrs=False,

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -1304,6 +1304,65 @@ def test_groupby_first_and_last(self):
         expected = array  # should be a no-op
         self.assertDataArrayIdentical(expected, actual)
 
+    def make_groupby_multidim_example_array(self):
+        return DataArray([[[0,1],[2,3]],[[5,10],[15,20]]],
+                        coords={'lon': (['ny', 'nx'], [[30., 40.], [40., 50.]] ),
+                                'lat': (['ny', 'nx'], [[10., 10.], [20., 20.]] ),},
+                        dims=['time', 'ny', 'nx'])
+
+    def test_groupby_multidim(self):
+        array = self.make_groupby_multidim_example_array()
+        for dim, expected_sum in [
+                ('lon', DataArray([5, 28, 23], coords={'lon': [30., 40., 50.]})),
+                ('lat', DataArray([16, 40], coords={'lat': [10., 20.]}))]:
+            actual_sum = array.groupby(dim).sum()
+            self.assertDataArrayIdentical(expected_sum, actual_sum)
+
+    def test_groupby_multidim_apply(self):
+        array = self.make_groupby_multidim_example_array()
+        actual = array.groupby('lon').apply(
+                lambda x : x - x.mean(), shortcut=False)
+        expected = DataArray([[[-2.5, -6.], [-5., -8.5]],
+                              [[ 2.5,  3.], [ 8.,  8.5]]],
+                    coords=array.coords, dims=array.dims)
+        self.assertDataArrayIdentical(expected, actual)
+
+    def test_groupby_bins(self):
+        array = DataArray(np.arange(4), dims='dim_0')
+        # the first value should not be part of any group ("right" binning)
+        array[0] = 99
+        # bins follow conventions for pandas.cut
+        # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
+        bins = [0,1.5,5]
+        bin_coords = ['(0, 1.5]', '(1.5, 5]']
+        expected = DataArray([1,5], dims='dim_0_bins',
+                        coords={'dim_0_bins': bin_coords})
+        # the problem with this is that it overwrites the dimensions of array!
+        #actual = array.groupby('dim_0', bins=bins).sum()
+        actual = array.groupby_bins('dim_0', bins).apply(
+                                    lambda x : x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+        # make sure original array dims are unchanged
+        # (would fail with shortcut=True above)
+        self.assertEqual(len(array.dim_0), 4)
+
+    def test_groupby_bins_multidim(self):
+        array = self.make_groupby_multidim_example_array()
+        bins = [0,15,20]
+        bin_coords = ['(0, 15]', '(15, 20]']
+        expected = DataArray([16, 40], dims='lat_bins',
+                                coords={'lat_bins': bin_coords})
+        actual = array.groupby_bins('lat', bins).apply(
+                                    lambda x : x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+        # modify the array coordinates to be non-monotonic after unstacking
+        array['lat'].data = np.array([[10., 20.], [20., 10.]])
+        expected = DataArray([28, 28], dims='lat_bins',
+                    coords={'lat_bins': bin_coords})
+        actual = array.groupby_bins('lat', bins).apply(
+                                    lambda x : x.sum(), shortcut=False)
+        self.assertDataArrayIdentical(expected, actual)
+
     def make_rolling_example_array(self):
         times = pd.date_range('2000-01-01', freq='1D', periods=21)
         values = np.random.random((21, 4))

diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
@@ -1545,8 +1545,6 @@ def test_groupby_iter(self):
 
     def test_groupby_errors(self):
         data = create_test_data()
-        with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
-            data.groupby('var1')
         with self.assertRaisesRegexp(ValueError, 'must have a name'):
             data.groupby(np.arange(10))
         with self.assertRaisesRegexp(ValueError, 'length does not match'):