From a6cb3f32b6f57d8e6c54514f682bb9aca6669140 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 24 Sep 2019 08:38:02 -0600 Subject: [PATCH 01/25] Fix groupby reduce for DataArray --- xarray/core/groupby.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3399b27b3e7..7daf95faa79 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -765,9 +765,15 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ + if dim is None: + dim = self._group_dim + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) + if dim not in peek_at(self._iter_grouped())[0]: + raise ValueError("Attempting to reduce over grouped dimension %r." % dim) + def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) @@ -863,6 +869,9 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) + if dim not in peek_at(self._iter_grouped())[0]: + raise ValueError("Attempting to reduce over grouped dimension %r" % dim) + return self.apply(reduce_dataset) def assign(self, **kwargs): From f320e94161307c865d7a1f3f721b2fe892f5947f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 24 Sep 2019 08:44:27 -0600 Subject: [PATCH 02/25] bugfix. --- xarray/core/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7daf95faa79..64686210243 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -771,7 +771,7 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim not in peek_at(self._iter_grouped())[0]: + if dim not in peek_at(self._iter_grouped())[0].dims: raise ValueError("Attempting to reduce over grouped dimension %r." % dim) def reduce_array(ar): @@ -869,7 +869,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim not in peek_at(self._iter_grouped())[0]: + if dim not in peek_at(self._iter_grouped())[0].dims: raise ValueError("Attempting to reduce over grouped dimension %r" % dim) return self.apply(reduce_dataset) From c4ea74250be38c363b1adb9e4dbc2def167ed79f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 24 Sep 2019 08:54:28 -0600 Subject: [PATCH 03/25] another bugfix. --- xarray/core/groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 64686210243..e56f7b1a175 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -7,7 +7,7 @@ from . import dtypes, duck_array_ops, nputils, ops from .arithmetic import SupportsArithmetic -from .common import ImplementsArrayReduce, ImplementsDatasetReduce +from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce from .concat import concat from .formatting import format_array_flat from .options import _get_keep_attrs @@ -771,7 +771,7 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim not in peek_at(self._iter_grouped())[0].dims: + if dim is not ALL_DIMS and dim not in peek_at(self._iter_grouped())[0].dims: raise ValueError("Attempting to reduce over grouped dimension %r." % dim) def reduce_array(ar): @@ -869,7 +869,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim not in peek_at(self._iter_grouped())[0].dims: + if dim is not ALL_DIMS and dim not in peek_at(self._iter_grouped())[0].dims: raise ValueError("Attempting to reduce over grouped dimension %r" % dim) return self.apply(reduce_dataset) From f6d396a4b68dd4c13d6fa33ed5ef1c2f6194b0ff Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 08:53:00 -0600 Subject: [PATCH 04/25] bugfix unique_and_monotonic for object indexes (uniqueness is enough) --- xarray/core/groupby.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e56f7b1a175..b7d62b74dba 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -191,7 +191,10 @@ def _unique_and_monotonic(group): return True else: index = safe_cast_to_index(group) - return index.is_unique and index.is_monotonic + if index.dtype == "O": + return index.is_unique + else: + return index.is_unique and index.is_monotonic def _apply_loffset(grouper, result): From b202ad5c5f32b70d828e2522f2ddb8d8538c6010 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 08:53:35 -0600 Subject: [PATCH 05/25] Add groupby.dims property. --- xarray/core/groupby.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b7d62b74dba..3f2ce382cbc 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -251,6 +251,7 @@ class GroupBy(SupportsArithmetic): "_restore_coord_dims", "_stacked_dim", "_unique_coord", + "dims", ) def __init__( @@ -383,6 +384,9 @@ def __init__( # cached attributes self._groups = None + example = obj.isel(**{group_dim: group_indices[0]}) + self.dims = example.dims + @property def groups(self): # provided to mimic pandas.groupby @@ -774,8 +778,11 @@ def reduce( if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if dim is not ALL_DIMS and dim not in peek_at(self._iter_grouped())[0].dims: - raise ValueError("Attempting to reduce over grouped dimension %r." % dim) + if dim is not ALL_DIMS and dim not in self.dims: + raise ValueError( + "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + % (dim, self.dims) + ) def reduce_array(ar): return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) @@ -872,8 +879,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): def reduce_dataset(ds): return ds.reduce(func, dim, keep_attrs, **kwargs) - if dim is not ALL_DIMS and dim not in peek_at(self._iter_grouped())[0].dims: - raise ValueError("Attempting to reduce over grouped dimension %r" % dim) + if dim is not ALL_DIMS and dim not in self.dims: + raise ValueError( + "cannot reduce over dimension %r. expected either xarray.ALL_DIMS to reduce over all dimensions or one or more of %r." + % (dim, self.dims) + ) return self.apply(reduce_dataset) From 75a52be1807168e66b0234fcbfa0709788b8d7d9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 08:54:59 -0600 Subject: [PATCH 06/25] update reduce docstring to point to xarray.ALL_DIMS --- xarray/core/groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3f2ce382cbc..b6a14e67396 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -696,7 +696,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): q : float in range of [0,1] (or sequence of floats) Quantile to compute, which must be between 0 and 1 inclusive. - dim : str or sequence of str, optional + dim : xarray.ALL_DIMS, str or sequence of str, optional Dimension(s) over which to apply quantile. Defaults to the grouped dimension. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} @@ -753,7 +753,7 @@ def reduce( Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : str or sequence of str, optional + dim : xarray.ALL_DIMS, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' @@ -851,7 +851,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Function which can be called in the form `func(x, axis=axis, **kwargs)` to return the result of collapsing an np.ndarray over an integer valued axis. - dim : str or sequence of str, optional + dim : xarray.ALL_DIMS, str or sequence of str, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to apply `func`. Only one of the 'dimension' From c0174810229f7d9d730453d504867e3935a5a5dd Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:09:08 -0600 Subject: [PATCH 07/25] test for object index dims. --- xarray/tests/test_groupby.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 463a6a32185..078d5019f5d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -21,6 +21,19 @@ def test_consolidate_slices(): _consolidate_slices([slice(3), 4]) +def test_groupby_dims_property(): + ds = xr.Dataset( + {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, + {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + ) + + assert ds.groupby("x").dims == ds.sel(x="a").dims + assert ds.groupby("y").dims == ds.sel(y=1).dims + + stacked = ds.stack({"xy": ("x", "y")}) + assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims + + def test_multi_index_groupby_apply(): # regression test for GH873 ds = xr.Dataset( From d1f47d54ca1272ee969a3a3cc3e7a82d233fd382 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:19:58 -0600 Subject: [PATCH 08/25] test reduce dimensions error. --- xarray/tests/test_dataarray.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 717025afb23..ea13d7c86fe 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2582,6 +2582,15 @@ def change_metadata(x): expected = change_metadata(expected) assert_equal(expected, actual) + def test_groupby_reduce_dimension_error(self): + array = self.make_groupby_example_array() + grouped = array.groupby("y") + with raises_regex(ValueError, "cannot reduce over dimension 'y'"): + grouped.mean() + + grouped = array.groupby("y", squeeze=False) + assert_identical(array, grouped.mean()) + def test_groupby_math(self): array = self.make_groupby_example_array() for squeeze in [True, False]: From f2093e2779f8420f8b62888f6697801d57deee91 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:22:24 -0600 Subject: [PATCH 09/25] Add whats-new --- doc/whats-new.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7103d7b8ab3..3de2dbfa104 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,13 +24,17 @@ New functions/methods Enhancements ~~~~~~~~~~~~ -- Add a repr for :py:class:`~xarray.core.GroupBy` objects. By `Deepak Cherian `_. - Example:: +- :py:class:`~xarray.core.GroupBy` enhancements. By `Deepak Cherian `_. + + - Added a repr. Example:: >>> da.groupby("time.season") DataArrayGroupBy, grouped over 'season' 4 groups with labels 'DJF', 'JJA', 'MAM', 'SON' + - Added a ``GroupBy.dims`` property that mirrors the dimensions + of each group. + Bug fixes ~~~~~~~~~ - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been @@ -40,6 +44,9 @@ Bug fixes - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays. (:issue:`3334`). By `Tom Nicholas `_. +- Fix deprecation of default reduction dimension for :py:class:`DataArray.GroupBy` objects. + (:issue:`3337`). Also make sure that dimensions with ``dtype='object'`` indexes are treated + the same as other indexes. By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From 0baa390f6e2e23469169ac88f73271c7e8bbfff2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:29:14 -0600 Subject: [PATCH 10/25] fix docs build --- doc/groupby.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/groupby.rst b/doc/groupby.rst index 03c0881d836..0f06cdabbf9 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -215,4 +215,4 @@ applying your function, and then unstacking the result: .. ipython:: python stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum().unstack('gridcell') + stacked.groupby('gridcell').sum(xr.ALL_DIMS).unstack('gridcell') From 33715a2d06f8f10eeb87d5a2173d423b89e912fd Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:32:32 -0600 Subject: [PATCH 11/25] sq whats-new --- doc/whats-new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3de2dbfa104..8c2a9f3aa7a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,9 +44,9 @@ Bug fixes - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays. (:issue:`3334`). By `Tom Nicholas `_. -- Fix deprecation of default reduction dimension for :py:class:`DataArray.GroupBy` objects. - (:issue:`3337`). Also make sure that dimensions with ``dtype='object'`` indexes are treated - the same as other indexes. By `Deepak Cherian `_. +- Fix deprecation of default reduction dimension for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects. + (:issue:`3337`). Also make sure that grouping dimensions with ``dtype='object'`` indexes + is squeezed just as for other indexes. By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From 4b13b5f89054753095161633df50a6c247a244e5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 4 Oct 2019 09:37:59 -0600 Subject: [PATCH 12/25] one more test. --- xarray/tests/test_groupby.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 078d5019f5d..4ece9ac29e3 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -30,6 +30,9 @@ def test_groupby_dims_property(): assert ds.groupby("x").dims == ds.sel(x="a").dims assert ds.groupby("y").dims == ds.sel(y=1).dims + assert ds.groupby("x", squeeze=False).dims == ds.isel(x=[1]).dims + assert ds.groupby("y", squeeze=False).dims == ds.isel(y=[1]).dims + stacked = ds.stack({"xy": ("x", "y")}) assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims From 8f257b1248c75c7468cef40eb4dbc0e75f8ca0c8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 7 Oct 2019 16:25:48 -0600 Subject: [PATCH 13/25] fix test. --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 4ece9ac29e3..9943c3592f4 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -24,7 +24,7 @@ def test_consolidate_slices(): def test_groupby_dims_property(): ds = xr.Dataset( {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, - {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + {"x": ["a", "sdb", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, ) assert ds.groupby("x").dims == ds.sel(x="a").dims From 94a8688cc30617dab86f2f032960b215548170ef Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 08:08:12 -0600 Subject: [PATCH 14/25] undo monotonic change. --- xarray/core/groupby.py | 5 +---- xarray/tests/test_groupby.py | 9 +++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b6a14e67396..7bac0eadd91 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -191,10 +191,7 @@ def _unique_and_monotonic(group): return True else: index = safe_cast_to_index(group) - if index.dtype == "O": - return index.is_unique - else: - return index.is_unique and index.is_monotonic + return index.is_unique and index.is_monotonic def _apply_loffset(grouper, result): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 9943c3592f4..b78992df470 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -24,14 +24,11 @@ def test_consolidate_slices(): def test_groupby_dims_property(): ds = xr.Dataset( {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, - {"x": ["a", "sdb", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, + {"x": ["a", "bcd", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, ) - assert ds.groupby("x").dims == ds.sel(x="a").dims - assert ds.groupby("y").dims == ds.sel(y=1).dims - - assert ds.groupby("x", squeeze=False).dims == ds.isel(x=[1]).dims - assert ds.groupby("y", squeeze=False).dims == ds.isel(y=[1]).dims + assert ds.groupby("x").dims == ds.isel(x=1).dims + assert ds.groupby("y").dims == ds.isel(y=1).dims stacked = ds.stack({"xy": ("x", "y")}) assert stacked.groupby("xy").dims == stacked.isel(xy=0).dims From 7c7c1198afa041165f9c2b82352fb9736857743f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 08:10:47 -0600 Subject: [PATCH 15/25] Add dimensions to repr. --- xarray/core/groupby.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7bac0eadd91..90ac9fa74d2 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -398,11 +398,15 @@ def __iter__(self): return zip(self._unique_coord.values, self._iter_grouped()) def __repr__(self): - return "%s, grouped over %r \n%r groups with labels %s" % ( - self.__class__.__name__, - self._unique_coord.name, - self._unique_coord.size, - ", ".join(format_array_flat(self._unique_coord, 30).split()), + return ( + "%s, grouped over %r \n%r groups with labels %s. \nEach group has dimensions: %r" + % ( + self.__class__.__name__, + self._unique_coord.name, + self._unique_coord.size, + ", ".join(format_array_flat(self._unique_coord, 30).split()), + list(self.dims), + ) ) def _get_index_and_items(self, index, grouper): From 88de8ae09048410956c102bae8b5ddfe308d845c Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 09:01:24 -0600 Subject: [PATCH 16/25] Raise error if no bins. --- xarray/core/groupby.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 90ac9fa74d2..45bf3cb4e0d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -381,6 +381,9 @@ def __init__( # cached attributes self._groups = None + if not group_indices and bins is not None: + raise ValueError("None of the data falls within bins with edges %r" % bins) + example = obj.isel(**{group_dim: group_indices[0]}) self.dims = example.dims From 2ee3bc2205aaa06481f5e8dfee79ac5d91c59cda Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 09:06:37 -0600 Subject: [PATCH 17/25] Raise nice error if no groups were formed. --- xarray/core/groupby.py | 9 +++++++-- xarray/tests/test_groupby.py | 11 ++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 45bf3cb4e0d..6c33040a84d 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -381,8 +381,13 @@ def __init__( # cached attributes self._groups = None - if not group_indices and bins is not None: - raise ValueError("None of the data falls within bins with edges %r" % bins) + if not group_indices: + if bins is not None: + raise ValueError( + "None of the data falls within bins with edges %r" % bins + ) + else: + raise ValueError("Failed to group data.") example = obj.isel(**{group_dim: group_indices[0]}) self.dims = example.dims diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b78992df470..ed328db2837 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -5,7 +5,7 @@ import xarray as xr from xarray.core.groupby import _consolidate_slices -from . import assert_identical +from . import assert_identical, raises_regex def test_consolidate_slices(): @@ -255,4 +255,13 @@ def test_groupby_repr_datetime(obj): assert actual == expected +def test_groupby_no_groups(): + dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) + with raises_regex(ValueError, "None of the data falls within bins with edges"): + dataset.groupby_bins("x", bins=[0.1, 0.2, 0.3]) + + with raises_regex(ValueError, "None of the data falls within bins with edges"): + dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + + # TODO: move other groupby tests from test_dataset and test_dataarray over here From 436b6e372c78595e05adef0a1d975d26f365d338 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 09:17:45 -0600 Subject: [PATCH 18/25] Some more error raising and testing. --- xarray/core/groupby.py | 6 +++++- xarray/tests/test_groupby.py | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 6c33040a84d..43b58fb085b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -321,6 +321,8 @@ def __init__( full_index = None if bins is not None: + if np.isnan(bins).all(): + raise ValueError("All bin edges are NaN.") binned = pd.cut(group.values, bins, **cut_kwargs) new_dim_name = group.name + "_bins" group = DataArray(binned, group.coords, name=new_dim_name) @@ -387,7 +389,9 @@ def __init__( "None of the data falls within bins with edges %r" % bins ) else: - raise ValueError("Failed to group data.") + raise ValueError( + "Failed to group data. Are you grouping by a variable that is all NaN?" + ) example = obj.isel(**{group_dim: group_indices[0]}) self.dims = example.dims diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index ed328db2837..154d4f0b22f 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -255,7 +255,7 @@ def test_groupby_repr_datetime(obj): assert actual == expected -def test_groupby_no_groups(): +def test_groupby_grouping_errors(): dataset = xr.Dataset({"foo": ("x", [1, 1, 1])}, {"x": [1, 2, 3]}) with raises_regex(ValueError, "None of the data falls within bins with edges"): dataset.groupby_bins("x", bins=[0.1, 0.2, 0.3]) @@ -263,5 +263,11 @@ def test_groupby_no_groups(): with raises_regex(ValueError, "None of the data falls within bins with edges"): dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + with raises_regex(ValueError, "All bin edges are NaN."): + dataset.to_array().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + + with raises_regex(ValueError, "Failed to group data."): + dataset.to_array().groupby(dataset.foo * np.nan) + # TODO: move other groupby tests from test_dataset and test_dataarray over here From de7cc7c49c4d96a8b3a360590b0a0ca3b41c4e6a Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 09:20:01 -0600 Subject: [PATCH 19/25] Add dataset tests. --- xarray/tests/test_groupby.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 154d4f0b22f..07bf9c91469 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -263,9 +263,15 @@ def test_groupby_grouping_errors(): with raises_regex(ValueError, "None of the data falls within bins with edges"): dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + with raises_regex(ValueError, "All bin edges are NaN."): + dataset.groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + with raises_regex(ValueError, "All bin edges are NaN."): dataset.to_array().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + with raises_regex(ValueError, "Failed to group data."): + dataset.groupby(dataset.foo * np.nan) + with raises_regex(ValueError, "Failed to group data."): dataset.to_array().groupby(dataset.foo * np.nan) From 5269e815e76bb66e50cd2647e5ad59c71c83d214 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 09:21:08 -0600 Subject: [PATCH 20/25] update whats-new. --- doc/whats-new.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8c2a9f3aa7a..3ab7a8d7e18 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,8 +45,7 @@ Bug fixes now plot the correct data for 2D DataArrays. (:issue:`3334`). By `Tom Nicholas `_. - Fix deprecation of default reduction dimension for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects. - (:issue:`3337`). Also make sure that grouping dimensions with ``dtype='object'`` indexes - is squeezed just as for other indexes. By `Deepak Cherian `_. + (:issue:`3337`). Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From bd5b5fd1c6463c454418ecab36d2357e769163e3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Oct 2019 10:46:44 -0600 Subject: [PATCH 21/25] fix tests. --- xarray/core/groupby.py | 2 +- xarray/tests/test_groupby.py | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 43b58fb085b..519b91538bd 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -383,7 +383,7 @@ def __init__( # cached attributes self._groups = None - if not group_indices: + if len(group_indices) == 0: if bins is not None: raise ValueError( "None of the data falls within bins with edges %r" % bins diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 07bf9c91469..8a30dc6025c 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -234,24 +234,29 @@ def test_groupby_repr(obj, dim): expected = "%sGroupBy" % obj.__class__.__name__ expected += ", grouped over %r " % dim expected += "\n%r groups with labels " % (len(np.unique(obj[dim]))) + dims = list(obj.dims) if dim == "x": - expected += "1, 2, 3, 4, 5" + expected += "1, 2, 3, 4, 5. " elif dim == "y": - expected += "0, 1, 2, 3, 4, 5, ..., 15, 16, 17, 18, 19" + expected += "0, 1, 2, 3, 4, 5, ..., 15, 16, 17, 18, 19. " + dims = list(obj.isel(y=1).dims) elif dim == "z": - expected += "'a', 'b', 'c'" + expected += "'a', 'b', 'c'. " elif dim == "month": - expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12" + expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12. " + expected += "\nEach group has dimensions: %r" % dims assert actual == expected @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr_datetime(obj): + dims = list(obj.dims) actual = repr(obj.groupby("t.month")) expected = "%sGroupBy" % obj.__class__.__name__ expected += ", grouped over 'month' " expected += "\n%r groups with labels " % (len(np.unique(obj.t.dt.month))) - expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12" + expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12. " + expected += "\nEach group has dimensions: %r" % dims assert actual == expected From 2cb7420f81b9b2d45d1f6baf0d19f6ebd8c3a0f4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 9 Oct 2019 07:49:33 -0600 Subject: [PATCH 22/25] make dims a cached lazy property. --- xarray/core/groupby.py | 45 +++++++++++++++++++----------------- xarray/tests/test_groupby.py | 15 ++++-------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 519b91538bd..c52b0dc97e6 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -248,7 +248,7 @@ class GroupBy(SupportsArithmetic): "_restore_coord_dims", "_stacked_dim", "_unique_coord", - "dims", + "_dims", ) def __init__( @@ -354,6 +354,16 @@ def __init__( ) unique_coord = IndexVariable(group.name, unique_values) + if len(group_indices) == 0: + if bins is not None: + raise ValueError( + "None of the data falls within bins with edges %r" % bins + ) + else: + raise ValueError( + "Failed to group data. Are you grouping by a variable that is all NaN?" + ) + if ( isinstance(obj, DataArray) and restore_coord_dims is None @@ -382,19 +392,16 @@ def __init__( # cached attributes self._groups = None + self._dims = None - if len(group_indices) == 0: - if bins is not None: - raise ValueError( - "None of the data falls within bins with edges %r" % bins - ) - else: - raise ValueError( - "Failed to group data. Are you grouping by a variable that is all NaN?" - ) + @property + def dims(self): + if self._dims is None: + self._dims = self._obj.isel( + **{self._group_dim: self._group_indices[0]} + ).dims - example = obj.isel(**{group_dim: group_indices[0]}) - self.dims = example.dims + return self._dims @property def groups(self): @@ -410,15 +417,11 @@ def __iter__(self): return zip(self._unique_coord.values, self._iter_grouped()) def __repr__(self): - return ( - "%s, grouped over %r \n%r groups with labels %s. \nEach group has dimensions: %r" - % ( - self.__class__.__name__, - self._unique_coord.name, - self._unique_coord.size, - ", ".join(format_array_flat(self._unique_coord, 30).split()), - list(self.dims), - ) + return "%s, grouped over %r \n%r groups with labels %s." % ( + self.__class__.__name__, + self._unique_coord.name, + self._unique_coord.size, + ", ".join(format_array_flat(self._unique_coord, 30).split()), ) def _get_index_and_items(self, index, grouper): diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8a30dc6025c..957d4800c40 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -234,29 +234,24 @@ def test_groupby_repr(obj, dim): expected = "%sGroupBy" % obj.__class__.__name__ expected += ", grouped over %r " % dim expected += "\n%r groups with labels " % (len(np.unique(obj[dim]))) - dims = list(obj.dims) if dim == "x": - expected += "1, 2, 3, 4, 5. " + expected += "1, 2, 3, 4, 5." elif dim == "y": - expected += "0, 1, 2, 3, 4, 5, ..., 15, 16, 17, 18, 19. " - dims = list(obj.isel(y=1).dims) + expected += "0, 1, 2, 3, 4, 5, ..., 15, 16, 17, 18, 19." elif dim == "z": - expected += "'a', 'b', 'c'. " + expected += "'a', 'b', 'c'." elif dim == "month": - expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12. " - expected += "\nEach group has dimensions: %r" % dims + expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12." assert actual == expected @pytest.mark.parametrize("obj", [repr_da, repr_da.to_dataset(name="a")]) def test_groupby_repr_datetime(obj): - dims = list(obj.dims) actual = repr(obj.groupby("t.month")) expected = "%sGroupBy" % obj.__class__.__name__ expected += ", grouped over 'month' " expected += "\n%r groups with labels " % (len(np.unique(obj.t.dt.month))) - expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12. " - expected += "\nEach group has dimensions: %r" % dims + expected += "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12." assert actual == expected From b8bc3d200430efa92f691eb13d588b5fe5d91b78 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 9 Oct 2019 07:53:07 -0600 Subject: [PATCH 23/25] fix whats-new. --- doc/whats-new.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3ab7a8d7e18..24a6f65e4da 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,8 +44,12 @@ Bug fixes - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord now plot the correct data for 2D DataArrays. (:issue:`3334`). By `Tom Nicholas `_. -- Fix deprecation of default reduction dimension for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects. - (:issue:`3337`). Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. +- The default behaviour of reducing across all dimensions for + :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects has now been properly removed + as was done for :py:class:`~xarray.core.groupby.DatasetGroupBy` in 0.13.0 (:issue:`3337`). + Use `xarray.ALL_DIMS` if you need to replicate previous behaviour. + Also raise nicer error message when no groups are created (:issue:`1764`). + By `Deepak Cherian `_. Documentation ~~~~~~~~~~~~~ From 74045082f9d7ab011e683da7b0a6346dde77484b Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 9 Oct 2019 09:04:34 -0600 Subject: [PATCH 24/25] whitespace --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2852206f3be..36c0b2cf6b0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -80,7 +80,7 @@ Bug fixes (:issue:`3337`). Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_. - + Documentation ~~~~~~~~~~~~~ From 40ad11cdcc63862c4d6b5eafb75164ae7b3d9fd2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 9 Oct 2019 09:05:20 -0600 Subject: [PATCH 25/25] fix whats-new --- doc/whats-new.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 36c0b2cf6b0..5088293e179 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -75,9 +75,6 @@ Bug fixes Use `xarray.ALL_DIMS` if you need to replicate previous behaviour. Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. - -- Fix deprecation of default reduction dimension for :py:class:`~xarray.core.groupby.DataArrayGroupBy` objects. - (:issue:`3337`). Also raise nicer error message when no groups are created (:issue:`1764`). By `Deepak Cherian `_. - Fix error in concatenating unlabeled dimensions (:pull:`3362`). By `Deepak Cherian `_.