diff --git a/xarray/groupers.py b/xarray/groupers.py
index 025f8fae486..4b93aa02ff5 100644
--- a/xarray/groupers.py
+++ b/xarray/groupers.py
@@ -319,7 +319,7 @@ class BinGrouper(Grouper):
         the resulting bins. If False, returns only integer indicators of the
         bins. This affects the type of the output container (see below).
         This argument is ignored when `bins` is an IntervalIndex. If True,
-        raises an error. When `ordered=False`, labels must be provided.
+        raises an error.
     retbins : bool, default False
         Whether to return the bins or not. Useful when bins is provided
         as a scalar.
@@ -394,8 +394,13 @@ def factorize(self, group: T_Group) -> EncodedGroups:
 
         # This seems silly, but it lets us have Pandas handle the complexity
         # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array
-        dummy, _ = self._cut(np.array([0]).astype(group.dtype))
-        full_index = dummy.categories
+        # Pandas ignores labels when IntervalIndex is passed
+        if not isinstance(self.bins, pd.IntervalIndex):
+            dummy, _ = self._cut(np.array([0]).astype(group.dtype))
+            full_index = dummy.categories
+        else:
+            full_index = pd.Index(self.labels)
+
         if not by_is_chunked:
             uniques = np.sort(pd.unique(codes.data.ravel()))
             unique_values = full_index[uniques[uniques != -1]]
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 52ab8c4d232..e47019f55c8 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -1062,6 +1062,12 @@ def test_groupby_bins_cut_kwargs(use_flox: bool) -> None:
         ).mean()
     assert_identical(expected, actual)
 
+    with xr.set_options(use_flox=use_flox):
+        bins_index = pd.IntervalIndex.from_breaks(x_bins)
+        labels = ["one", "two", "three"]
+        actual = da.groupby(x=BinGrouper(bins=bins_index, labels=labels)).sum()
+        assert actual.xindexes["x_bins"].index.equals(pd.Index(labels))  # type: ignore[attr-defined]
+
 
 @pytest.mark.parametrize("indexed_coord", [True, False])
 @pytest.mark.parametrize(