From 9221aac92e7cc2c6527b10afa18aba996f55f1fd Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Tue, 4 Jan 2022 19:48:41 -0700
Subject: [PATCH 01/15] Vectorize groupby math

---
 xarray/core/groupby.py | 82 ++++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 26 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 68db14c29be..6f108a3aa38 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -262,6 +262,9 @@ class GroupBy:
         "_stacked_dim",
         "_unique_coord",
         "_dims",
+        "_original_obj",
+        "_original_group",
+        "_bins",
     )
 
     def __init__(
@@ -323,6 +326,11 @@ def __init__(
         if getattr(group, "name", None) is None:
             group.name = "group"
 
+        #  save object before any stacking
+        self._original_obj = obj
+        self._original_group = group
+        self._bins = bins
+
         group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
         (group_dim,) = group.dims
 
@@ -476,34 +484,56 @@ def _infer_concat_args(self, applied_example):
 
     def _binary_op(self, other, f, reflexive=False):
         g = f if not reflexive else lambda x, y: f(y, x)
-        applied = self._yield_binary_applied(g, other)
-        return self._combine(applied)
-
-    def _yield_binary_applied(self, func, other):
-        dummy = None
 
-        for group_value, obj in self:
-            try:
-                other_sel = other.sel(**{self._group.name: group_value})
-            except AttributeError:
-                raise TypeError(
-                    "GroupBy objects only support binary ops "
-                    "when the other argument is a Dataset or "
-                    "DataArray"
+        obj = self._original_obj
+        group = self._original_group
+        name = group.name
+        dim = self._group_dim
+        # import IPython; IPython.core.debugger.set_trace()
+        try:
+            if self._bins is not None:
+                other = other.sel({f"{name}_bins": self._group})
+                if isinstance(group, _DummyGroup):
+                    # When binning by unindexed coordinate we need to reindex obj.
+                    # _full_index is IntervalIndex, so idx will be -1 where
+                    # a value does not belong to any bin. Using IntervalIndex
+                    # accounts for  any non-default cut_kwargs passed to the constructor
+                    idx = pd.cut(obj[dim], bins=self._full_index).codes
+                    obj = obj.isel({dim: np.arange(group.size)[idx != -1]})
+            else:
+                if isinstance(group, _DummyGroup):
+                    group = obj[dim]
+                other = other.sel({name: group})
+        except AttributeError:
+            raise TypeError(
+                "GroupBy objects only support binary ops "
+                "when the other argument is a Dataset or "
+                "DataArray"
+            )
+        except (KeyError, ValueError):
+            if name not in other.dims:
+                raise ValueError(
+                    "incompatible dimensions for a grouped "
+                    f"binary operation: the group variable {name!r} "
+                    "is not a dimension on the other argument"
                 )
-            except (KeyError, ValueError):
-                if self._group.name not in other.dims:
-                    raise ValueError(
-                        "incompatible dimensions for a grouped "
-                        f"binary operation: the group variable {self._group.name!r} "
-                        "is not a dimension on the other argument"
-                    )
-                if dummy is None:
-                    dummy = _dummy_copy(other)
-                other_sel = dummy
-
-            result = func(obj, other_sel)
-            yield result
+            # some labels are absent i.e. other is not aligned
+            # so we align by reindexing and then rename dimensions.
+            # TODO: probably need to copy some coordinates over
+            other = (
+                other.reindex({name: group.data})
+                .rename({name: dim})
+                .assign_coords({dim: obj[dim]})
+            )
+
+        result = g(obj, other)
+
+        # backcompat: concat during the "combine" step places
+        # `dim` as the first dimension
+        if dim in result.dims:
+            # guards against self._group_dim being "stacked"
+            result = result.transpose(dim, ...)
+        return result
 
     def _maybe_restore_empty_groups(self, combined):
         """Our index contained empty groups (e.g., from a resampling). If we

From 6a158e1cb6c6a2e2674d066bbba7d09b6db6ec1f Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Tue, 4 Jan 2022 19:50:56 -0700
Subject: [PATCH 02/15] WIP

---
 xarray/core/groupby.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 6f108a3aa38..3eeca84ec3c 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -492,7 +492,11 @@ def _binary_op(self, other, f, reflexive=False):
         # import IPython; IPython.core.debugger.set_trace()
         try:
             if self._bins is not None:
-                other = other.sel({f"{name}_bins": self._group})
+                if self._stacked_dim is not None:
+                    group = self._group.unstack()
+                # idx = pd.factorize(group.data.ravel())[0]
+                # group_idx = group.copy(data=idx.reshape(group.data.shape))
+                other = other.sel({f"{name}_bins": group})
                 if isinstance(group, _DummyGroup):
                     # When binning by unindexed coordinate we need to reindex obj.
                     # _full_index is IntervalIndex, so idx will be -1 where
@@ -528,11 +532,12 @@ def _binary_op(self, other, f, reflexive=False):
 
         result = g(obj, other)
 
-        # backcompat: concat during the "combine" step places
-        # `dim` as the first dimension
-        if dim in result.dims:
-            # guards against self._group_dim being "stacked"
-            result = result.transpose(dim, ...)
+        # backcompat:
+        for var in set(obj.coords) - set(obj.xindexes):
+            print(var)
+            if dim not in obj[var]:
+                print(f"excluding {dim}, broadcasting {var}")
+                result[var] = obj[var].reset_coords(drop=True).broadcast_like(result)
         return result
 
     def _maybe_restore_empty_groups(self, combined):

From 24980f2a0daecc65f1cc46413711a02c4ca85a34 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Thu, 17 Mar 2022 18:07:33 +0530
Subject: [PATCH 03/15] rework tests

---
 xarray/tests/test_groupby.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 4b6da82bdc7..5cb8fcfadd1 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -1133,26 +1133,28 @@ def change_metadata(x):
         expected = change_metadata(expected)
         assert_equal(expected, actual)
 
-    def test_groupby_math(self):
+    @pytest.mark.parametrize("squeeze", [True, False])
+    def test_groupby_math_squeeze(self, squeeze):
         array = self.da
-        for squeeze in [True, False]:
-            grouped = array.groupby("x", squeeze=squeeze)
+        grouped = array.groupby("x", squeeze=squeeze)
 
-            expected = array + array.coords["x"]
-            actual = grouped + array.coords["x"]
-            assert_identical(expected, actual)
+        expected = array + array.coords["x"]
+        actual = grouped + array.coords["x"]
+        assert_identical(expected, actual)
 
-            actual = array.coords["x"] + grouped
-            assert_identical(expected, actual)
+        actual = array.coords["x"] + grouped
+        assert_identical(expected, actual)
 
-            ds = array.coords["x"].to_dataset(name="X")
-            expected = array + ds
-            actual = grouped + ds
-            assert_identical(expected, actual)
+        ds = array.coords["x"].to_dataset(name="X")
+        expected = array + ds
+        actual = grouped + ds
+        assert_identical(expected, actual)
 
-            actual = ds + grouped
-            assert_identical(expected, actual)
+        actual = ds + grouped
+        assert_identical(expected, actual)
 
+    def test_groupby_math(self):
+        array = self.da
         grouped = array.groupby("abc")
         expected_agg = (grouped.mean(...) - np.arange(3)).rename(None)
         actual = grouped - DataArray(range(3), [("abc", ["a", "b", "c"])])

From 050ea70d65e84d6f719110f4459e431c2fcb0a94 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Thu, 17 Mar 2022 22:36:12 +0530
Subject: [PATCH 04/15] working!

---
 xarray/core/groupby.py | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 3eeca84ec3c..36a65f52b51 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -489,25 +489,28 @@ def _binary_op(self, other, f, reflexive=False):
         group = self._original_group
         name = group.name
         dim = self._group_dim
-        # import IPython; IPython.core.debugger.set_trace()
+
         try:
             if self._bins is not None:
-                if self._stacked_dim is not None:
-                    group = self._group.unstack()
-                # idx = pd.factorize(group.data.ravel())[0]
-                # group_idx = group.copy(data=idx.reshape(group.data.shape))
-                other = other.sel({f"{name}_bins": group})
-                if isinstance(group, _DummyGroup):
+                group = self._maybe_unstack(self._group)
+                other = other.sel({f"{name}_bins": self._group})
+                # TODO: vectorized indexing bug in .sel; name_bins is still an IndexVariable!
+                other[f"{name}_bins"] = other[f"{name}_bins"].variable.to_variable()
+                if name == dim and dim not in obj.xindexes:
                     # When binning by unindexed coordinate we need to reindex obj.
                     # _full_index is IntervalIndex, so idx will be -1 where
                     # a value does not belong to any bin. Using IntervalIndex
                     # accounts for  any non-default cut_kwargs passed to the constructor
                     idx = pd.cut(obj[dim], bins=self._full_index).codes
-                    obj = obj.isel({dim: np.arange(group.size)[idx != -1]})
+                    obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
+                else:
+                    obj = self._obj
+
             else:
                 if isinstance(group, _DummyGroup):
                     group = obj[dim]
                 other = other.sel({name: group})
+
         except AttributeError:
             raise TypeError(
                 "GroupBy objects only support binary ops "
@@ -523,7 +526,12 @@ def _binary_op(self, other, f, reflexive=False):
                 )
             # some labels are absent i.e. other is not aligned
             # so we align by reindexing and then rename dimensions.
-            # TODO: probably need to copy some coordinates over
+            # Broadcast out scalars.
+            for var in other.coords:
+                if other[var].ndim == 0:
+                    other[var] = (
+                        other[var].drop(var).expand_dims({name: other.sizes[name]})
+                    )
             other = (
                 other.reindex({name: group.data})
                 .rename({name: dim})
@@ -532,12 +540,13 @@ def _binary_op(self, other, f, reflexive=False):
 
         result = g(obj, other)
 
-        # backcompat:
-        for var in set(obj.coords) - set(obj.xindexes):
-            print(var)
-            if dim not in obj[var]:
-                print(f"excluding {dim}, broadcasting {var}")
-                result[var] = obj[var].reset_coords(drop=True).broadcast_like(result)
+        if group.ndim > 1:
+            # backcompat:
+            for var in set(obj.coords) - set(obj.xindexes):
+                if set(obj[var].dims) < set(group.dims):
+                    result[var] = obj[var].reset_coords(drop=True).broadcast_like(group)
+
+        result = self._maybe_unstack(result).transpose(*group.dims, ...)
         return result
 
     def _maybe_restore_empty_groups(self, combined):

From 291656f6e5eee7ef88990dec1f40918c1c0b5756 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Fri, 18 Mar 2022 08:04:17 +0530
Subject: [PATCH 05/15] Cleanup

---
 xarray/core/groupby.py | 80 ++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 9476d73d0b8..2fd97cd9648 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -264,8 +264,6 @@ class GroupBy:
         "_stacked_dim",
         "_unique_coord",
         "_dims",
-        "_original_obj",
-        "_original_group",
         "_bins",
     )
 
@@ -328,11 +326,6 @@ def __init__(
         if getattr(group, "name", None) is None:
             group.name = "group"
 
-        #  save object before any stacking
-        self._original_obj = obj
-        self._original_group = group
-        self._bins = bins
-
         group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
         (group_dim,) = group.dims
 
@@ -409,6 +402,7 @@ def __init__(
         self._inserted_dims = inserted_dims
         self._full_index = full_index
         self._restore_coord_dims = restore_coord_dims
+        self._bins = bins
 
         # cached attributes
         self._groups = None
@@ -486,50 +480,41 @@ def _infer_concat_args(self, applied_example):
         return coord, dim, positions
 
     def _binary_op(self, other, f, reflexive=False):
+        from .dataarray import DataArray
+        from .dataset import Dataset
+
         g = f if not reflexive else lambda x, y: f(y, x)
 
-        obj = self._original_obj
-        group = self._original_group
-        name = group.name
+        obj = self._obj
+        name = self._group.name
         dim = self._group_dim
 
-        try:
-            if self._bins is not None:
-                group = self._maybe_unstack(self._group)
-                other = other.sel({f"{name}_bins": self._group})
-                # TODO: vectorized indexing bug in .sel; name_bins is still an IndexVariable!
-                other[f"{name}_bins"] = other[f"{name}_bins"].variable.to_variable()
-                if name == dim and dim not in obj.xindexes:
-                    # When binning by unindexed coordinate we need to reindex obj.
-                    # _full_index is IntervalIndex, so idx will be -1 where
-                    # a value does not belong to any bin. Using IntervalIndex
-                    # accounts for  any non-default cut_kwargs passed to the constructor
-                    idx = pd.cut(obj[dim], bins=self._full_index).codes
-                    obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
-                else:
-                    obj = self._obj
-
-            else:
-                if isinstance(group, _DummyGroup):
-                    group = obj[dim]
-                other = other.sel({name: group})
-
-        except AttributeError:
+        if not isinstance(other, (Dataset, DataArray)):
             raise TypeError(
                 "GroupBy objects only support binary ops "
                 "when the other argument is a Dataset or "
                 "DataArray"
             )
-        except (KeyError, ValueError):
-            if name not in other.dims:
-                raise ValueError(
-                    "incompatible dimensions for a grouped "
-                    f"binary operation: the group variable {name!r} "
-                    "is not a dimension on the other argument"
-                )
+
+        if name not in other.dims:
+            raise ValueError(
+                "incompatible dimensions for a grouped "
+                f"binary operation: the group variable {name!r} "
+                "is not a dimension on the other argument"
+            )
+
+        group = self._group
+        if isinstance(group, _DummyGroup):
+            group = obj[dim]
+
+        try:
+            other = other.sel({name: group})
+        except KeyError:
             # some labels are absent i.e. other is not aligned
             # so we align by reindexing and then rename dimensions.
-            # Broadcast out scalars.
+
+            # Broadcast out scalars for backwards compatibility
+            # TODO: get rid of this.
             for var in other.coords:
                 if other[var].ndim == 0:
                     other[var] = (
@@ -541,15 +526,28 @@ def _binary_op(self, other, f, reflexive=False):
                 .assign_coords({dim: obj[dim]})
             )
 
+        if self._bins is not None:
+            # TODO: vectorized indexing bug in .sel; name_bins is still an IndexVariable!
+            other[name] = other[name].variable.to_base_variable()
+            if name == dim and dim not in obj.xindexes:
+                # When binning by unindexed coordinate we need to reindex obj.
+                # _full_index is IntervalIndex, so idx will be -1 where
+                # a value does not belong to any bin. Using IntervalIndex
+                # accounts for  any non-default cut_kwargs passed to the constructor
+                idx = pd.cut(obj[dim], bins=self._full_index).codes
+                obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
+
         result = g(obj, other)
 
+        result = self._maybe_unstack(result)
+        group = self._maybe_unstack(group)
         if group.ndim > 1:
             # backcompat:
             for var in set(obj.coords) - set(obj.xindexes):
                 if set(obj[var].dims) < set(group.dims):
                     result[var] = obj[var].reset_coords(drop=True).broadcast_like(group)
 
-        result = self._maybe_unstack(result).transpose(*group.dims, ...)
+        result = result.transpose(*group.dims, ...)
         return result
 
     def _maybe_restore_empty_groups(self, combined):

From 97974b6e2d737901526011c6ccd47bdc2c9e9980 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Fri, 18 Mar 2022 13:29:57 +0530
Subject: [PATCH 06/15] Add benchmarks

---
 asv_bench/benchmarks/groupby.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 46d6293cc98..a399ef29ef8 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -16,6 +16,8 @@ def setup(self, *args, **kwargs):
             }
         )
         self.ds2d = self.ds1d.expand_dims(z=10)
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+        self.ds2d_mean = self.ds2d.groupby("b").mean()
 
     @parameterized(["ndim"], [(1, 2)])
     def time_init(self, ndim):
@@ -31,6 +33,12 @@ def time_agg_large_num_groups(self, method, ndim):
         ds = getattr(self, f"ds{ndim}d")
         getattr(ds.groupby("b"), method)()
 
+    def time_groupby_binary_op_1d(self):
+        self.ds1d - self.ds1d_mean
+
+    def time_groupby_binary_op_2d(self):
+        self.ds2d - self.ds2d_mean
+
 
 class GroupByDask(GroupBy):
     def setup(self, *args, **kwargs):
@@ -40,6 +48,8 @@ def setup(self, *args, **kwargs):
         self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
             {"dim_0": 50, "z": 5}
         )
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+        self.ds2d_mean = self.ds2d.groupby("b").mean()
 
 
 class GroupByPandasDataFrame(GroupBy):
@@ -51,6 +61,13 @@ def setup(self, *args, **kwargs):
 
         super().setup(**kwargs)
         self.ds1d = self.ds1d.to_dataframe()
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+
+    def time_groupby_binary_op_1d(self):
+        self.ds1d - self.ds1d_bmean
+
+    def time_groupby_binary_op_2d(self):
+        raise NotImplementedError
 
 
 class GroupByDaskDataFrame(GroupBy):
@@ -63,6 +80,13 @@ def setup(self, *args, **kwargs):
         requires_dask()
         super().setup(**kwargs)
         self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
+        self.ds1d_mean = self.ds1d.groupby("b").mean()
+
+    def time_groupby_binary_op_1d(self):
+        self.ds1d - self.ds1d_bmean
+
+    def time_groupby_binary_op_2d(self):
+        raise NotImplementedError
 
 
 class Resample:
@@ -74,6 +98,8 @@ def setup(self, *args, **kwargs):
             coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
         )
         self.ds2d = self.ds1d.expand_dims(z=10)
+        self.ds1d_mean = self.ds1d.resample(time="48H").mean()
+        self.ds2d_mean = self.ds2d.resample(time="48H").mean()
 
     @parameterized(["ndim"], [(1, 2)])
     def time_init(self, ndim):
@@ -89,6 +115,12 @@ def time_agg_large_num_groups(self, method, ndim):
         ds = getattr(self, f"ds{ndim}d")
         getattr(ds.resample(time="48H"), method)()
 
+    def time_groupby_binary_op_1d(self):
+        self.ds1d - self.ds1d_bmean
+
+    def time_groupby_binary_op_2d(self):
+        self.ds2d - self.ds2d_mean
+
 
 class ResampleDask(Resample):
     def setup(self, *args, **kwargs):

From d5127a74d20a8d9395f7f44b1c863d0f3a18df5a Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Fri, 18 Mar 2022 13:48:08 +0530
Subject: [PATCH 07/15] Add whats-new note

---
 doc/whats-new.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 05bdfcf78bb..7b96dac922d 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -52,6 +52,12 @@ Bug fixes
 Documentation
 ~~~~~~~~~~~~~
 
+Performance
+~~~~~~~~~~~
+
+- GroupBy binary operations are now vectorized.
+  Previously this involved looping over all groups. (:issue:`5804`,:pull:`6160`)
+  By `Deepak Cherian <https://github.com/dcherian>`_.
 
 Internal Changes
 ~~~~~~~~~~~~~~~~

From 1adbb664b8aefc8f6eba570dc66bba52ffb59f7f Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Fri, 18 Mar 2022 15:29:06 +0530
Subject: [PATCH 08/15] fix benchmark

---
 asv_bench/benchmarks/groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index a399ef29ef8..309a1a903f0 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -64,7 +64,7 @@ def setup(self, *args, **kwargs):
         self.ds1d_mean = self.ds1d.groupby("b").mean()
 
     def time_groupby_binary_op_1d(self):
-        self.ds1d - self.ds1d_bmean
+        self.ds1d - self.ds1d_mean
 
     def time_groupby_binary_op_2d(self):
         raise NotImplementedError
@@ -83,7 +83,7 @@ def setup(self, *args, **kwargs):
         self.ds1d_mean = self.ds1d.groupby("b").mean()
 
     def time_groupby_binary_op_1d(self):
-        self.ds1d - self.ds1d_bmean
+        self.ds1d - self.ds1d_mean
 
     def time_groupby_binary_op_2d(self):
         raise NotImplementedError
@@ -116,7 +116,7 @@ def time_agg_large_num_groups(self, method, ndim):
         getattr(ds.resample(time="48H"), method)()
 
     def time_groupby_binary_op_1d(self):
-        self.ds1d - self.ds1d_bmean
+        self.ds1d - self.ds1d_mean
 
     def time_groupby_binary_op_2d(self):
         self.ds2d - self.ds2d_mean

From a0e686532f4d8e2d61c89af65a9e17f3b4147caf Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Fri, 18 Mar 2022 16:15:52 +0530
Subject: [PATCH 09/15] Fix output dimension order

---
 xarray/core/groupby.py       |  7 +++++--
 xarray/tests/test_groupby.py | 11 +++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 2fd97cd9648..aa28f469b77 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -518,7 +518,7 @@ def _binary_op(self, other, f, reflexive=False):
             for var in other.coords:
                 if other[var].ndim == 0:
                     other[var] = (
-                        other[var].drop(var).expand_dims({name: other.sizes[name]})
+                        other[var].drop_vars(var).expand_dims({name: other.sizes[name]})
                     )
             other = (
                 other.reindex({name: group.data})
@@ -547,7 +547,10 @@ def _binary_op(self, other, f, reflexive=False):
                 if set(obj[var].dims) < set(group.dims):
                     result[var] = obj[var].reset_coords(drop=True).broadcast_like(group)
 
-        result = result.transpose(*group.dims, ...)
+        if isinstance(result, Dataset) and isinstance(obj, Dataset):
+            for var in set(result):
+                if dim not in obj[var].dims:
+                    result[var] = result[var].transpose(dim, ...)
         return result
 
     def _maybe_restore_empty_groups(self, combined):
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 8e1855789ee..4e92f009987 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -857,6 +857,17 @@ def test_groupby_dataset_math_virtual() -> None:
     assert_identical(actual, expected)
 
 
+def test_groupby_math_dim_order() -> None:
+    da = DataArray(
+        np.ones((10, 10, 12)),
+        dims=("x", "y", "time"),
+        coords={"time": pd.date_range("2001-01-01", periods=12, freq="6H")},
+    )
+    grouped = da.groupby("time.day")
+    result = grouped - grouped.mean()
+    assert result.dims == da.dims
+
+
 def test_groupby_dataset_nan() -> None:
     # nan should be excluded from groupby
     ds = Dataset({"foo": ("x", [1, 2, 3, 4])}, {"bar": ("x", [1, 1, 2, np.nan])})

From fd3031c66c7f24a82d378d784f9c501d8bfee42c Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Sat, 19 Mar 2022 10:21:44 +0530
Subject: [PATCH 10/15] cleanup

---
 asv_bench/benchmarks/groupby.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 309a1a903f0..60c960abc55 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -63,9 +63,6 @@ def setup(self, *args, **kwargs):
         self.ds1d = self.ds1d.to_dataframe()
         self.ds1d_mean = self.ds1d.groupby("b").mean()
 
-    def time_groupby_binary_op_1d(self):
-        self.ds1d - self.ds1d_mean
-
     def time_groupby_binary_op_2d(self):
         raise NotImplementedError
 
@@ -82,9 +79,6 @@ def setup(self, *args, **kwargs):
         self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
         self.ds1d_mean = self.ds1d.groupby("b").mean()
 
-    def time_groupby_binary_op_1d(self):
-        self.ds1d - self.ds1d_mean
-
     def time_groupby_binary_op_2d(self):
         raise NotImplementedError
 

From bae15d5c250cb0f1e903c404756e12a10f59a56d Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Mon, 21 Mar 2022 10:19:38 +0530
Subject: [PATCH 11/15] comment

---
 xarray/core/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index aa28f469b77..866099c1e54 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -514,7 +514,7 @@ def _binary_op(self, other, f, reflexive=False):
             # so we align by reindexing and then rename dimensions.
 
             # Broadcast out scalars for backwards compatibility
-            # TODO: get rid of this.
+            # TODO: get rid of this when fixing GH2145
             for var in other.coords:
                 if other[var].ndim == 0:
                     other[var] = (

From cc69d2d41e8766948e3be4c4b5c0b105b744f4ef Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Mon, 21 Mar 2022 10:22:02 +0530
Subject: [PATCH 12/15] cleanup

---
 xarray/core/groupby.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index aecc3d5e45d..056dcd98605 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -526,16 +526,13 @@ def _binary_op(self, other, f, reflexive=False):
                 .assign_coords({dim: obj[dim]})
             )
 
-        if self._bins is not None:
-            # TODO: vectorized indexing bug in .sel; name_bins is still an IndexVariable!
-            other[name] = other[name].variable.to_base_variable()
-            if name == dim and dim not in obj.xindexes:
-                # When binning by unindexed coordinate we need to reindex obj.
-                # _full_index is IntervalIndex, so idx will be -1 where
-                # a value does not belong to any bin. Using IntervalIndex
-                # accounts for  any non-default cut_kwargs passed to the constructor
-                idx = pd.cut(obj[dim], bins=self._full_index).codes
-                obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
+        if self._bins is not None and name == dim and dim not in obj.xindexes:
+            # When binning by unindexed coordinate we need to reindex obj.
+            # _full_index is IntervalIndex, so idx will be -1 where
+            # a value does not belong to any bin. Using IntervalIndex
+            # accounts for  any non-default cut_kwargs passed to the constructor
+            idx = pd.cut(obj[dim], bins=self._full_index).codes
+            obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
 
         result = g(obj, other)
 

From cc5e8a4eb7363d2316350828b719ad232f4fd3b5 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Mon, 21 Mar 2022 10:44:18 +0530
Subject: [PATCH 13/15] cleanup

---
 xarray/core/groupby.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 056dcd98605..ab6e02cde31 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -486,8 +486,11 @@ def _binary_op(self, other, f, reflexive=False):
         g = f if not reflexive else lambda x, y: f(y, x)
 
         obj = self._obj
-        name = self._group.name
+        group = self._group
         dim = self._group_dim
+        if isinstance(group, _DummyGroup):
+            group = obj[dim]
+        name = group.name
 
         if not isinstance(other, (Dataset, DataArray)):
             raise TypeError(
@@ -503,10 +506,6 @@ def _binary_op(self, other, f, reflexive=False):
                 "is not a dimension on the other argument"
             )
 
-        group = self._group
-        if isinstance(group, _DummyGroup):
-            group = obj[dim]
-
         try:
             other = other.sel({name: group})
         except KeyError:
@@ -531,8 +530,8 @@ def _binary_op(self, other, f, reflexive=False):
             # _full_index is IntervalIndex, so idx will be -1 where
             # a value does not belong to any bin. Using IntervalIndex
             # accounts for  any non-default cut_kwargs passed to the constructor
-            idx = pd.cut(obj[dim], bins=self._full_index).codes
-            obj = obj.isel({dim: np.arange(obj[dim].size)[idx != -1]})
+            idx = pd.cut(group, bins=self._full_index).codes
+            obj = obj.isel({dim: np.arange(group.size)[idx != -1]})
 
         result = g(obj, other)
 
@@ -540,6 +539,7 @@ def _binary_op(self, other, f, reflexive=False):
         group = self._maybe_unstack(group)
         if group.ndim > 1:
             # backcompat:
+            # TODO: get rid of this when fixing GH2145
             for var in set(obj.coords) - set(obj.xindexes):
                 if set(obj[var].dims) < set(group.dims):
                     result[var] = obj[var].reset_coords(drop=True).broadcast_like(group)

From 169e51a28b1734a58c8fa59d588e38450763e358 Mon Sep 17 00:00:00 2001
From: dcherian <deepak@cherian.net>
Date: Mon, 21 Mar 2022 10:48:37 +0530
Subject: [PATCH 14/15] More cleanup

---
 xarray/core/groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index ab6e02cde31..14d6a7773b0 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -507,7 +507,7 @@ def _binary_op(self, other, f, reflexive=False):
             )
 
         try:
-            other = other.sel({name: group})
+            expanded = other.sel({name: group})
         except KeyError:
             # some labels are absent i.e. other is not aligned
             # so we align by reindexing and then rename dimensions.
@@ -519,7 +519,7 @@ def _binary_op(self, other, f, reflexive=False):
                     other[var] = (
                         other[var].drop_vars(var).expand_dims({name: other.sizes[name]})
                     )
-            other = (
+            expanded = (
                 other.reindex({name: group.data})
                 .rename({name: dim})
                 .assign_coords({dim: obj[dim]})
@@ -533,7 +533,7 @@ def _binary_op(self, other, f, reflexive=False):
             idx = pd.cut(group, bins=self._full_index).codes
             obj = obj.isel({dim: np.arange(group.size)[idx != -1]})
 
-        result = g(obj, other)
+        result = g(obj, expanded)
 
         result = self._maybe_unstack(result)
         group = self._maybe_unstack(group)

From bb53249f4ad23e1a1e336207f1b0fcb724a105a1 Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Sun, 27 Mar 2022 22:07:23 +0530
Subject: [PATCH 15/15] Update xarray/core/groupby.py

Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com>
---
 xarray/core/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 14d6a7773b0..952794dcef9 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -529,7 +529,7 @@ def _binary_op(self, other, f, reflexive=False):
             # When binning by unindexed coordinate we need to reindex obj.
             # _full_index is IntervalIndex, so idx will be -1 where
             # a value does not belong to any bin. Using IntervalIndex
-            # accounts for  any non-default cut_kwargs passed to the constructor
+            # accounts for any non-default cut_kwargs passed to the constructor
             idx = pd.cut(group, bins=self._full_index).codes
             obj = obj.isel({dim: np.arange(group.size)[idx != -1]})