From 164bfc6d8984ca2c16c4a9dc2859c1a040623e1a Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 31 Jul 2024 21:49:02 +0000
Subject: [PATCH] fix: Fix caching from generating row numbers in partial
 ordering mode

---
 bigframes/series.py                  |  2 +-
 bigframes/session/__init__.py        |  4 +++-
 tests/system/small/test_unordered.py | 15 ++++++++++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 1a5661529c..2ff4f8f455 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -641,7 +641,7 @@ def head(self, n: int = 5) -> Series:
     def tail(self, n: int = 5) -> Series:
         return typing.cast(Series, self.iloc[-n:])
 
-    def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
+    def peek(self, n: int = 5, *, force: bool = True) -> pandas.Series:
         """
         Preview n arbitrary elements from the series without guarantees about row selection or ordering.
 
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 98cba867f2..5e06469974 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1992,8 +1992,10 @@ def _cache_with_session_awareness(self, array_value: core.ArrayValue) -> None:
         )
         if len(cluster_cols) > 0:
             self._cache_with_cluster_cols(core.ArrayValue(target), cluster_cols)
-        else:
+        elif self._strictly_ordered:
             self._cache_with_offsets(core.ArrayValue(target))
+        else:
+            self._cache_with_cluster_cols(core.ArrayValue(target), [])
 
     def _simplify_with_caching(self, array_value: core.ArrayValue):
         """Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""
diff --git a/tests/system/small/test_unordered.py b/tests/system/small/test_unordered.py
index 7d7097ceb3..df967e532e 100644
--- a/tests/system/small/test_unordered.py
+++ b/tests/system/small/test_unordered.py
@@ -17,7 +17,11 @@
 
 import bigframes.exceptions
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
+from tests.system.utils import (
+    assert_pandas_df_equal,
+    assert_series_equal,
+    skip_legacy_pandas,
+)
 
 
 def test_unordered_mode_sql_no_hash(unordered_session):
@@ -49,6 +53,15 @@ def test_unordered_mode_cache_aggregate(unordered_session):
     assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
+def test_unordered_mode_series_peek(unordered_session):
+    pd_series = pd.Series([1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype())
+    bf_series = bpd.Series(pd_series, session=unordered_session)
+    pd_result = pd_series.groupby(pd_series % 4).sum()
+    bf_peek = bf_series.groupby(bf_series % 4).sum().peek(2)
+
+    assert_series_equal(bf_peek, pd_result.reindex(bf_peek.index))
+
+
 def test_unordered_mode_single_aggregate(unordered_session):
     pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
     bf_df = bpd.DataFrame(pd_df, session=unordered_session)