@@ -135,11 +135,11 @@ def test_groupby_reduce(
135
135
by = da .from_array (by , chunks = (3 ,) if by .ndim == 1 else (1 , 3 ))
136
136
137
137
if func == "mean" or func == "nanmean" :
138
- expected_result = np .array (expected , dtype = float )
138
+ expected_result = np .array (expected , dtype = np . float64 )
139
139
elif func == "sum" :
140
140
expected_result = np .array (expected , dtype = dtype )
141
141
elif func == "count" :
142
- expected_result = np .array (expected , dtype = int )
142
+ expected_result = np .array (expected , dtype = np . int64 )
143
143
144
144
result , groups , = groupby_reduce (
145
145
array ,
@@ -149,7 +149,9 @@ def test_groupby_reduce(
149
149
fill_value = 123 ,
150
150
engine = engine ,
151
151
)
152
- g_dtype = by .dtype if expected_groups is None else np .asarray (expected_groups ).dtype
152
+ # we use pd.Index(expected_groups).to_numpy() which is always int64
153
+ # for the values in this tests
154
+ g_dtype = by .dtype if expected_groups is None else np .int64
153
155
154
156
assert_equal (groups , np .array ([0 , 1 , 2 ], g_dtype ))
155
157
assert_equal (expected_result , result )
@@ -274,7 +276,7 @@ def test_groupby_reduce_count():
274
276
array = np .array ([0 , 0 , np .nan , np .nan , np .nan , 1 , 1 ])
275
277
labels = np .array (["a" , "b" , "b" , "b" , "c" , "c" , "c" ])
276
278
result , _ = groupby_reduce (array , labels , func = "count" )
277
- assert_equal (result , [1 , 1 , 2 ])
279
+ assert_equal (result , np . array ( [1 , 1 , 2 ], dtype = np . int64 ) )
278
280
279
281
280
282
def test_func_is_aggregation ():
@@ -383,53 +385,52 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
383
385
kwargs ["expected_groups" ] = [0 , 2 , 1 ]
384
386
with raise_if_dask_computes ():
385
387
actual , groups = groupby_reduce (array , by , engine = engine , ** kwargs , sort = False )
386
- assert_equal (groups , [0 , 2 , 1 ])
388
+ assert_equal (groups , np . array ( [0 , 2 , 1 ], dtype = np . intp ) )
387
389
assert_equal (expected , actual [..., [0 , 2 , 1 ]])
388
390
389
- kwargs ["expected_groups" ] = [0 , 2 , 1 ]
390
391
with raise_if_dask_computes ():
391
392
actual , groups = groupby_reduce (array , by , engine = engine , ** kwargs , sort = True )
392
- assert_equal (groups , [0 , 1 , 2 ])
393
+ assert_equal (groups , np . array ( [0 , 1 , 2 ], np . intp ) )
393
394
assert_equal (expected , actual )
394
395
395
396
396
397
def test_numpy_reduce_axis_subset (engine ):
397
398
# TODO: add NaNs
398
399
by = labels2d
399
- array = np .ones_like (by )
400
+ array = np .ones_like (by , dtype = np . int64 )
400
401
kwargs = dict (func = "count" , engine = engine , fill_value = 0 )
401
402
result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
402
- assert_equal (result , [[2 , 3 ], [2 , 3 ]])
403
+ assert_equal (result , np . array ( [[2 , 3 ], [2 , 3 ]], dtype = np . int64 ) )
403
404
404
405
by = np .broadcast_to (labels2d , (3 , * labels2d .shape ))
405
406
array = np .ones_like (by )
406
407
result , _ = groupby_reduce (array , by , ** kwargs , axis = 1 )
407
- subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]])
408
+ subarr = np .array ([[1 , 1 ], [1 , 1 ], [0 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np . int64 )
408
409
expected = np .tile (subarr , (3 , 1 , 1 ))
409
410
assert_equal (result , expected )
410
411
411
412
result , _ = groupby_reduce (array , by , ** kwargs , axis = 2 )
412
- subarr = np .array ([[2 , 3 ], [2 , 3 ]])
413
+ subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np . int64 )
413
414
expected = np .tile (subarr , (3 , 1 , 1 ))
414
415
assert_equal (result , expected )
415
416
416
417
result , _ = groupby_reduce (array , by , ** kwargs , axis = (1 , 2 ))
417
- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]])
418
+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np . int64 )
418
419
assert_equal (result , expected )
419
420
420
421
result , _ = groupby_reduce (array , by , ** kwargs , axis = (2 , 1 ))
421
422
assert_equal (result , expected )
422
423
423
424
result , _ = groupby_reduce (array , by [0 , ...], ** kwargs , axis = (1 , 2 ))
424
- expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]])
425
+ expected = np .array ([[4 , 6 ], [4 , 6 ], [4 , 6 ]], dtype = np . int64 )
425
426
assert_equal (result , expected )
426
427
427
428
428
429
@requires_dask
429
430
def test_dask_reduce_axis_subset ():
430
431
431
432
by = labels2d
432
- array = np .ones_like (by )
433
+ array = np .ones_like (by , dtype = np . int64 )
433
434
with raise_if_dask_computes ():
434
435
result , _ = groupby_reduce (
435
436
da .from_array (array , chunks = (2 , 3 )),
@@ -438,11 +439,11 @@ def test_dask_reduce_axis_subset():
438
439
axis = 1 ,
439
440
expected_groups = [0 , 2 ],
440
441
)
441
- assert_equal (result , [[2 , 3 ], [2 , 3 ]])
442
+ assert_equal (result , np . array ( [[2 , 3 ], [2 , 3 ]], dtype = np . int64 ) )
442
443
443
444
by = np .broadcast_to (labels2d , (3 , * labels2d .shape ))
444
445
array = np .ones_like (by )
445
- subarr = np .array ([[1 , 1 ], [1 , 1 ], [123 , 2 ], [1 , 1 ], [1 , 1 ]])
446
+ subarr = np .array ([[1 , 1 ], [1 , 1 ], [123 , 2 ], [1 , 1 ], [1 , 1 ]], dtype = np . int64 )
446
447
expected = np .tile (subarr , (3 , 1 , 1 ))
447
448
with raise_if_dask_computes ():
448
449
result , _ = groupby_reduce (
@@ -455,7 +456,7 @@ def test_dask_reduce_axis_subset():
455
456
)
456
457
assert_equal (result , expected )
457
458
458
- subarr = np .array ([[2 , 3 ], [2 , 3 ]])
459
+ subarr = np .array ([[2 , 3 ], [2 , 3 ]], dtype = np . int64 )
459
460
expected = np .tile (subarr , (3 , 1 , 1 ))
460
461
with raise_if_dask_computes ():
461
462
result , _ = groupby_reduce (
@@ -663,7 +664,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
663
664
engine = engine ,
664
665
method = method ,
665
666
)
666
- expected = np .array ([3 , 1 , 0 ])
667
+ expected = np .array ([3 , 1 , 0 ], dtype = np . int64 )
667
668
for left , right in zip (groups , pd .IntervalIndex .from_arrays ([1 , 2 , 4 ], [2 , 4 , 5 ]).to_numpy ()):
668
669
assert left == right
669
670
assert_equal (actual , expected )
@@ -780,15 +781,23 @@ def test_dtype_preservation(dtype, func, engine):
780
781
781
782
782
783
@requires_dask
783
- @pytest .mark .parametrize ("method" , ["split-reduce" , "map-reduce" , "cohorts" ])
784
- def test_cohorts (method ):
785
- repeats = [4 , 4 , 12 , 2 , 3 , 4 ]
786
- labels = np .repeat (np .arange (6 ), repeats )
787
- array = dask .array .from_array (labels , chunks = (4 , 8 , 4 , 9 , 4 ))
784
+ @pytest .mark .parametrize ("dtype" , [np .int32 , np .int64 ])
785
+ @pytest .mark .parametrize (
786
+ "labels_dtype" , [pytest .param (np .int32 , marks = pytest .mark .xfail ), np .int64 ]
787
+ )
788
+ @pytest .mark .parametrize ("method" , ["map-reduce" , "cohorts" ])
789
+ def test_cohorts_map_reduce_consistent_dtypes (method , dtype , labels_dtype ):
790
+ repeats = np .array ([4 , 4 , 12 , 2 , 3 , 4 ], dtype = np .int32 )
791
+ labels = np .repeat (np .arange (6 , dtype = labels_dtype ), repeats )
792
+ array = dask .array .from_array (labels .astype (dtype ), chunks = (4 , 8 , 4 , 9 , 4 ))
788
793
789
794
actual , actual_groups = groupby_reduce (array , labels , func = "count" , method = method )
790
- assert_equal (actual_groups , np .arange (6 ))
791
- assert_equal (actual , repeats )
795
+ assert_equal (actual_groups , np .arange (6 , dtype = labels .dtype ))
796
+ assert_equal (actual , repeats .astype (np .int64 ))
797
+
798
+ actual , actual_groups = groupby_reduce (array , labels , func = "sum" , method = method )
799
+ assert_equal (actual_groups , np .arange (6 , dtype = labels .dtype ))
800
+ assert_equal (actual , np .array ([0 , 4 , 24 , 6 , 12 , 20 ], dtype ))
792
801
793
802
794
803
@requires_dask
@@ -800,7 +809,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
800
809
o2 = dask .array .ones ((2 , 3 ), chunks = - 1 )
801
810
802
811
array = dask .array .block ([[o , 2 * o ], [3 * o2 , 4 * o2 ]])
803
- by = array .compute ().astype (int )
812
+ by = array .compute ().astype (np . int64 )
804
813
by [0 , 1 ] = 30
805
814
by [2 , 1 ] = 40
806
815
by [0 , 4 ] = 31
@@ -825,9 +834,9 @@ def test_cohorts_nd_by(func, method, axis, engine):
825
834
826
835
actual , groups = groupby_reduce (array , by , sort = False , ** kwargs )
827
836
if method == "map-reduce" :
828
- assert_equal (groups , [1 , 30 , 2 , 31 , 3 , 4 , 40 ])
837
+ assert_equal (groups , np . array ( [1 , 30 , 2 , 31 , 3 , 4 , 40 ], dtype = np . int64 ) )
829
838
else :
830
- assert_equal (groups , [1 , 30 , 2 , 31 , 3 , 40 , 4 ])
839
+ assert_equal (groups , np . array ( [1 , 30 , 2 , 31 , 3 , 40 , 4 ], dtype = np . int64 ) )
831
840
reindexed = reindex_ (actual , groups , pd .Index (sorted_groups ))
832
841
assert_equal (reindexed , expected )
833
842
@@ -950,7 +959,7 @@ def test_factorize_values_outside_bins():
950
959
fastpath = True ,
951
960
)
952
961
actual = vals [0 ]
953
- expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]])
962
+ expected = np .array ([[- 1 , - 1 ], [- 1 , 0 ], [6 , 12 ], [18 , 24 ], [- 1 , - 1 ]], np . int64 )
954
963
assert_equal (expected , actual )
955
964
956
965
@@ -967,7 +976,7 @@ def test_multiple_groupers() -> None:
967
976
reindex = True ,
968
977
func = "count" ,
969
978
)
970
- expected = np .eye (5 , 5 , dtype = int )
979
+ expected = np .eye (5 , 5 , dtype = np . int64 )
971
980
assert_equal (expected , actual )
972
981
973
982
@@ -979,38 +988,38 @@ def test_factorize_reindex_sorting_strings():
979
988
)
980
989
981
990
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
982
- assert_equal (expected , [0 , 1 , 4 , 2 ])
991
+ assert_equal (expected , np . array ( [0 , 1 , 4 , 2 ], dtype = np . int64 ) )
983
992
984
993
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
985
- assert_equal (expected , [0 , 3 , 4 , 1 ])
994
+ assert_equal (expected , np . array ( [0 , 3 , 4 , 1 ], dtype = np . int64 ) )
986
995
987
996
expected = factorize_ (** kwargs , reindex = False , sort = False )[0 ]
988
- assert_equal (expected , [0 , 1 , 2 , 3 ])
997
+ assert_equal (expected , np . array ( [0 , 1 , 2 , 3 ], dtype = np . int64 ) )
989
998
990
999
expected = factorize_ (** kwargs , reindex = False , sort = True )[0 ]
991
- assert_equal (expected , [0 , 1 , 3 , 2 ])
1000
+ assert_equal (expected , np . array ( [0 , 1 , 3 , 2 ], dtype = np . int64 ) )
992
1001
993
1002
994
1003
def test_factorize_reindex_sorting_ints ():
995
1004
kwargs = dict (
996
1005
by = (np .array ([- 10 , 1 , 10 , 2 , 3 , 5 ]),),
997
1006
axis = - 1 ,
998
- expected_groups = (np .array ([0 , 1 , 2 , 3 , 4 , 5 ]),),
1007
+ expected_groups = (np .array ([0 , 1 , 2 , 3 , 4 , 5 ], np . int64 ),),
999
1008
)
1000
1009
1001
1010
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1002
- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1011
+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
1003
1012
1004
1013
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1005
- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1014
+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
1006
1015
1007
1016
kwargs ["expected_groups" ] = (np .arange (5 , - 1 , - 1 ),)
1008
1017
1009
1018
expected = factorize_ (** kwargs , reindex = True , sort = True )[0 ]
1010
- assert_equal (expected , [6 , 1 , 6 , 2 , 3 , 5 ])
1019
+ assert_equal (expected , np . array ( [6 , 1 , 6 , 2 , 3 , 5 ], dtype = np . int64 ) )
1011
1020
1012
1021
expected = factorize_ (** kwargs , reindex = True , sort = False )[0 ]
1013
- assert_equal (expected , [6 , 4 , 6 , 3 , 2 , 0 ])
1022
+ assert_equal (expected , np . array ( [6 , 4 , 6 , 3 , 2 , 0 ], dtype = np . int64 ) )
1014
1023
1015
1024
1016
1025
@requires_dask
0 commit comments