From 3cefb6bfbbbba8a7c46812b042d181769edecde6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 14:42:48 +0200 Subject: [PATCH 01/10] Remove raise_if_dask --- flox/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 58b89bf17..74c7588e7 100644 --- a/flox/core.py +++ b/flox/core.py @@ -54,11 +54,9 @@ def _is_arg_reduction(func: str | Aggregation) -> bool: return False -def _get_expected_groups(by, sort, *, raise_if_dask=True) -> pd.Index | None: +def _get_expected_groups(by, sort: bool) -> pd.Index: if is_duck_dask_array(by): - if raise_if_dask: - raise ValueError("Please provide expected_groups if not grouping by a numpy array.") - return None + raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) expected = pd.unique(flatby[~isnull(flatby)]) return _convert_expected_groups_to_index((expected,), isbin=(False,), sort=sort)[0] From 475542a0a626f3c8767a1a5d0dd410044d5170a0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 14:50:19 +0200 Subject: [PATCH 02/10] remove argument in in code. --- flox/core.py | 9 ++++++--- flox/xarray.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 74c7588e7..78c00a337 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1149,9 +1149,12 @@ def dask_groupby_agg( ) else: intermediate = applied - if expected_groups is None: - expected_groups = _get_expected_groups(by_input, sort=sort, raise_if_dask=False) - group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) + if not is_duck_dask_array(by_input): + expected_groups = _get_expected_groups(by_input, sort=sort) + group_chunks = ((len(expected_groups),),) + else: + expected_groups = None + group_chunks = ((np.nan,),) if method == "map-reduce": # these are negative axis indices useful for concatenating the intermediates diff --git a/flox/xarray.py b/flox/xarray.py index c02959485..5f87bafe6 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -313,7 +313,7 @@ def xarray_reduce( f"Please provided bin edges for group variable {idx} " f"named {group_name} in expected_groups." ) - expect_ = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) + expect_ = _get_expected_groups(b_.data, sort=sort) else: expect_ = expect expect_index = _convert_expected_groups_to_index((expect_,), (isbin_,), sort=sort)[0] From 8a5c807227cc4d9ced506ca7f2b1787bc31b7be9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:19:46 +0200 Subject: [PATCH 03/10] Update core.py --- flox/core.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flox/core.py b/flox/core.py index 78c00a337..07aaa1305 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1149,13 +1149,13 @@ def dask_groupby_agg( ) else: intermediate = applied - if not is_duck_dask_array(by_input): - expected_groups = _get_expected_groups(by_input, sort=sort) - group_chunks = ((len(expected_groups),),) - else: + if is_duck_dask_array(by_input): expected_groups = None - group_chunks = ((np.nan,),) - + # group_chunks = ((np.nan,),) + else: + expected_groups = _get_expected_groups(by_input, sort=sort) + # group_chunks = ((len(expected_groups),),) + group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) if method == "map-reduce": # these are negative axis indices useful for concatenating the intermediates neg_axis = tuple(range(-len(axis), 0)) From 9ae3742b7b5d7a0289412181fbd9f6928102bc68 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:29:09 +0200 Subject: [PATCH 04/10] Update core.py --- flox/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/core.py b/flox/core.py index 07aaa1305..c1a2537a3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1155,6 +1155,7 @@ def dask_groupby_agg( else: expected_groups = _get_expected_groups(by_input, sort=sort) # group_chunks = ((len(expected_groups),),) + group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) if method == "map-reduce": # these are negative axis indices useful for concatenating the intermediates From e48225209fdcf715bff9bc5afa73de1bf7a27516 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:29:59 +0200 Subject: [PATCH 05/10] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index c1a2537a3..c8cb8c3d4 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1155,8 +1155,8 @@ def dask_groupby_agg( else: expected_groups = _get_expected_groups(by_input, sort=sort) # group_chunks = ((len(expected_groups),),) - group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) + if method == "map-reduce": # these are negative axis indices useful for concatenating the intermediates neg_axis = tuple(range(-len(axis), 0)) From 252b702fa2866ffbd523dc42154f9ebad8f311d4 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:43:10 +0200 Subject: [PATCH 06/10] Update core.py --- flox/core.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/flox/core.py b/flox/core.py index c8cb8c3d4..52e8b415d 100644 --- a/flox/core.py +++ b/flox/core.py @@ -54,7 +54,7 @@ def _is_arg_reduction(func: str | Aggregation) -> bool: return False -def _get_expected_groups(by, sort: bool) -> pd.Index: +def _get_expected_groups(by, sort: bool, *, raise_if_dask=True) -> pd.Index: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) @@ -1149,12 +1149,13 @@ def dask_groupby_agg( ) else: intermediate = applied - if is_duck_dask_array(by_input): - expected_groups = None - # group_chunks = ((np.nan,),) - else: - expected_groups = _get_expected_groups(by_input, sort=sort) - # group_chunks = ((len(expected_groups),),) + # if is_duck_dask_array(by_input): + # expected_groups = None + # # group_chunks = ((np.nan,),) + # else: + # expected_groups = _get_expected_groups(by_input, sort=sort) + # # group_chunks = ((len(expected_groups),),) + expected_groups = _get_expected_groups(by_input, sort=sort, raise_if_dask=False) group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) if method == "map-reduce": From 98dc9b6cadc6ecd145f207abe4c1db8a62395fec Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 15:44:48 +0200 Subject: [PATCH 07/10] Update core.py --- flox/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index 52e8b415d..cc10da927 100644 --- a/flox/core.py +++ b/flox/core.py @@ -56,7 +56,9 @@ def _is_arg_reduction(func: str | Aggregation) -> bool: def _get_expected_groups(by, sort: bool, *, raise_if_dask=True) -> pd.Index: if is_duck_dask_array(by): - raise ValueError("Please provide expected_groups if not grouping by a numpy array.") + if raise_if_dask: + raise ValueError("Please provide expected_groups if not grouping by a numpy array.") + return None flatby = by.reshape(-1) expected = pd.unique(flatby[~isnull(flatby)]) return _convert_expected_groups_to_index((expected,), isbin=(False,), sort=sort)[0] From 70bcfd4b803594872db545db791356d26f48376e Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 16:01:17 +0200 Subject: [PATCH 08/10] Missed a if ! --- flox/core.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/flox/core.py b/flox/core.py index cc10da927..59da6ec74 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1151,13 +1151,14 @@ def dask_groupby_agg( ) else: intermediate = applied - # if is_duck_dask_array(by_input): - # expected_groups = None - # # group_chunks = ((np.nan,),) - # else: - # expected_groups = _get_expected_groups(by_input, sort=sort) - # # group_chunks = ((len(expected_groups),),) - expected_groups = _get_expected_groups(by_input, sort=sort, raise_if_dask=False) + if expected_groups is None: + if is_duck_dask_array(by_input): + expected_groups = None + # group_chunks = ((np.nan,),) + else: + expected_groups = _get_expected_groups(by_input, sort=sort) + # group_chunks = ((len(expected_groups),),) + # expected_groups = _get_expected_groups(by_input, sort=sort, raise_if_dask=False) group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) if method == "map-reduce": From 02450fbd328b6c27e6bee305a8b1680e9a90486b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 16:14:24 +0200 Subject: [PATCH 09/10] Update core.py --- flox/core.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index 59da6ec74..259e618c3 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1154,11 +1154,8 @@ def dask_groupby_agg( if expected_groups is None: if is_duck_dask_array(by_input): expected_groups = None - # group_chunks = ((np.nan,),) else: expected_groups = _get_expected_groups(by_input, sort=sort) - # group_chunks = ((len(expected_groups),),) - # expected_groups = _get_expected_groups(by_input, sort=sort, raise_if_dask=False) group_chunks = ((len(expected_groups),) if expected_groups is not None else (np.nan,),) if method == "map-reduce": From c425d100f8337a300447d0a44efe6e08b5b8811d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 25 Sep 2022 16:16:43 +0200 Subject: [PATCH 10/10] Update core.py --- flox/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flox/core.py b/flox/core.py index 259e618c3..0ddf608ed 100644 --- a/flox/core.py +++ b/flox/core.py @@ -54,11 +54,9 @@ def _is_arg_reduction(func: str | Aggregation) -> bool: return False -def _get_expected_groups(by, sort: bool, *, raise_if_dask=True) -> pd.Index: +def _get_expected_groups(by, sort: bool) -> pd.Index: if is_duck_dask_array(by): - if raise_if_dask: - raise ValueError("Please provide expected_groups if not grouping by a numpy array.") - return None + raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) expected = pd.unique(flatby[~isnull(flatby)]) return _convert_expected_groups_to_index((expected,), isbin=(False,), sort=sort)[0]