Skip to content

Commit 9b01c48

Browse files
Illviljanpre-commit-ci[bot]dcherian
authored
Add windows CI (#151)
* Add windows CI * Update ci.yaml * Update ci.yaml * Make arg input the same as shown in pytest * Add dtype check * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * have expected and actual results on the same side * use np.intp for count expected * [revert] minimize test * specify dtypes * more fixers * more. * Fix groupby_reduce * [revert] only wiindows tests * more fixes? * more fixes. * more fix * Last fix? * Update .github/workflows/ci.yaml * revert * Better fix * Revert "revert" This reverts commit 3b79f6e. * better comment. * clean up test * Revert "Revert "revert"" This reverts commit 38438a2. * xfail labels dtype test * Revert "[revert] only wiindows tests" This reverts commit 232cf15. * Revert "[revert] minimize test" This reverts commit f993b31. * fix bad revert Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: dcherian <[email protected]> Co-authored-by: Deepak Cherian <[email protected]>
1 parent e3ea0e7 commit 9b01c48

File tree

3 files changed

+54
-43
lines changed

3 files changed

+54
-43
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
strategy:
2525
fail-fast: false
2626
matrix:
27-
os: ["ubuntu-latest"]
27+
os: ["ubuntu-latest", "windows-latest"]
2828
python-version: ["3.8", "3.10"]
2929
steps:
3030
- uses: actions/checkout@v3
@@ -43,8 +43,7 @@ jobs:
4343
python="${{ matrix.python-version }}"
4444
- name: Install flox
4545
run: |
46-
python -m pip install -e .
47-
conda list
46+
python -m pip install --no-deps -e .
4847
- name: Run Tests
4948
run: |
5049
pytest -n auto --cov=./ --cov-report=xml

flox/core.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1350,7 +1350,10 @@ def dask_groupby_agg(
13501350
aggregate=partial(aggregate, expected_groups=index, reindex=True),
13511351
)
13521352
)
1353-
groups_.append(cohort)
1353+
# This is done because pandas promotes to 64-bit types when an Index is created
1354+
# So we use the index to generate the return value for consistency with "map-reduce"
1355+
# This is important on windows
1356+
groups_.append(index.values)
13541357

13551358
reduced = dask.array.concatenate(reduced_, axis=-1)
13561359
groups = (np.concatenate(groups_),)

tests/test_core.py

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,11 @@ def test_groupby_reduce(
135135
by = da.from_array(by, chunks=(3,) if by.ndim == 1 else (1, 3))
136136

137137
if func == "mean" or func == "nanmean":
138-
expected_result = np.array(expected, dtype=float)
138+
expected_result = np.array(expected, dtype=np.float64)
139139
elif func == "sum":
140140
expected_result = np.array(expected, dtype=dtype)
141141
elif func == "count":
142-
expected_result = np.array(expected, dtype=int)
142+
expected_result = np.array(expected, dtype=np.int64)
143143

144144
result, groups, = groupby_reduce(
145145
array,
@@ -149,7 +149,9 @@ def test_groupby_reduce(
149149
fill_value=123,
150150
engine=engine,
151151
)
152-
g_dtype = by.dtype if expected_groups is None else np.asarray(expected_groups).dtype
152+
# we use pd.Index(expected_groups).to_numpy() which is always int64
153+
# for the values in this tests
154+
g_dtype = by.dtype if expected_groups is None else np.int64
153155

154156
assert_equal(groups, np.array([0, 1, 2], g_dtype))
155157
assert_equal(expected_result, result)
@@ -274,7 +276,7 @@ def test_groupby_reduce_count():
274276
array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1])
275277
labels = np.array(["a", "b", "b", "b", "c", "c", "c"])
276278
result, _ = groupby_reduce(array, labels, func="count")
277-
assert_equal(result, [1, 1, 2])
279+
assert_equal(result, np.array([1, 1, 2], dtype=np.int64))
278280

279281

280282
def test_func_is_aggregation():
@@ -383,53 +385,52 @@ def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtyp
383385
kwargs["expected_groups"] = [0, 2, 1]
384386
with raise_if_dask_computes():
385387
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False)
386-
assert_equal(groups, [0, 2, 1])
388+
assert_equal(groups, np.array([0, 2, 1], dtype=np.intp))
387389
assert_equal(expected, actual[..., [0, 2, 1]])
388390

389-
kwargs["expected_groups"] = [0, 2, 1]
390391
with raise_if_dask_computes():
391392
actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True)
392-
assert_equal(groups, [0, 1, 2])
393+
assert_equal(groups, np.array([0, 1, 2], np.intp))
393394
assert_equal(expected, actual)
394395

395396

396397
def test_numpy_reduce_axis_subset(engine):
397398
# TODO: add NaNs
398399
by = labels2d
399-
array = np.ones_like(by)
400+
array = np.ones_like(by, dtype=np.int64)
400401
kwargs = dict(func="count", engine=engine, fill_value=0)
401402
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
402-
assert_equal(result, [[2, 3], [2, 3]])
403+
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))
403404

404405
by = np.broadcast_to(labels2d, (3, *labels2d.shape))
405406
array = np.ones_like(by)
406407
result, _ = groupby_reduce(array, by, **kwargs, axis=1)
407-
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]])
408+
subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.int64)
408409
expected = np.tile(subarr, (3, 1, 1))
409410
assert_equal(result, expected)
410411

411412
result, _ = groupby_reduce(array, by, **kwargs, axis=2)
412-
subarr = np.array([[2, 3], [2, 3]])
413+
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
413414
expected = np.tile(subarr, (3, 1, 1))
414415
assert_equal(result, expected)
415416

416417
result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2))
417-
expected = np.array([[4, 6], [4, 6], [4, 6]])
418+
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
418419
assert_equal(result, expected)
419420

420421
result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1))
421422
assert_equal(result, expected)
422423

423424
result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2))
424-
expected = np.array([[4, 6], [4, 6], [4, 6]])
425+
expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.int64)
425426
assert_equal(result, expected)
426427

427428

428429
@requires_dask
429430
def test_dask_reduce_axis_subset():
430431

431432
by = labels2d
432-
array = np.ones_like(by)
433+
array = np.ones_like(by, dtype=np.int64)
433434
with raise_if_dask_computes():
434435
result, _ = groupby_reduce(
435436
da.from_array(array, chunks=(2, 3)),
@@ -438,11 +439,11 @@ def test_dask_reduce_axis_subset():
438439
axis=1,
439440
expected_groups=[0, 2],
440441
)
441-
assert_equal(result, [[2, 3], [2, 3]])
442+
assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.int64))
442443

443444
by = np.broadcast_to(labels2d, (3, *labels2d.shape))
444445
array = np.ones_like(by)
445-
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]])
446+
subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]], dtype=np.int64)
446447
expected = np.tile(subarr, (3, 1, 1))
447448
with raise_if_dask_computes():
448449
result, _ = groupby_reduce(
@@ -455,7 +456,7 @@ def test_dask_reduce_axis_subset():
455456
)
456457
assert_equal(result, expected)
457458

458-
subarr = np.array([[2, 3], [2, 3]])
459+
subarr = np.array([[2, 3], [2, 3]], dtype=np.int64)
459460
expected = np.tile(subarr, (3, 1, 1))
460461
with raise_if_dask_computes():
461462
result, _ = groupby_reduce(
@@ -663,7 +664,7 @@ def test_groupby_bins(chunk_labels, chunks, engine, method) -> None:
663664
engine=engine,
664665
method=method,
665666
)
666-
expected = np.array([3, 1, 0])
667+
expected = np.array([3, 1, 0], dtype=np.int64)
667668
for left, right in zip(groups, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()):
668669
assert left == right
669670
assert_equal(actual, expected)
@@ -780,15 +781,23 @@ def test_dtype_preservation(dtype, func, engine):
780781

781782

782783
@requires_dask
783-
@pytest.mark.parametrize("method", ["split-reduce", "map-reduce", "cohorts"])
784-
def test_cohorts(method):
785-
repeats = [4, 4, 12, 2, 3, 4]
786-
labels = np.repeat(np.arange(6), repeats)
787-
array = dask.array.from_array(labels, chunks=(4, 8, 4, 9, 4))
784+
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
785+
@pytest.mark.parametrize(
786+
"labels_dtype", [pytest.param(np.int32, marks=pytest.mark.xfail), np.int64]
787+
)
788+
@pytest.mark.parametrize("method", ["map-reduce", "cohorts"])
789+
def test_cohorts_map_reduce_consistent_dtypes(method, dtype, labels_dtype):
790+
repeats = np.array([4, 4, 12, 2, 3, 4], dtype=np.int32)
791+
labels = np.repeat(np.arange(6, dtype=labels_dtype), repeats)
792+
array = dask.array.from_array(labels.astype(dtype), chunks=(4, 8, 4, 9, 4))
788793

789794
actual, actual_groups = groupby_reduce(array, labels, func="count", method=method)
790-
assert_equal(actual_groups, np.arange(6))
791-
assert_equal(actual, repeats)
795+
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
796+
assert_equal(actual, repeats.astype(np.int64))
797+
798+
actual, actual_groups = groupby_reduce(array, labels, func="sum", method=method)
799+
assert_equal(actual_groups, np.arange(6, dtype=labels.dtype))
800+
assert_equal(actual, np.array([0, 4, 24, 6, 12, 20], dtype))
792801

793802

794803
@requires_dask
@@ -800,7 +809,7 @@ def test_cohorts_nd_by(func, method, axis, engine):
800809
o2 = dask.array.ones((2, 3), chunks=-1)
801810

802811
array = dask.array.block([[o, 2 * o], [3 * o2, 4 * o2]])
803-
by = array.compute().astype(int)
812+
by = array.compute().astype(np.int64)
804813
by[0, 1] = 30
805814
by[2, 1] = 40
806815
by[0, 4] = 31
@@ -825,9 +834,9 @@ def test_cohorts_nd_by(func, method, axis, engine):
825834

826835
actual, groups = groupby_reduce(array, by, sort=False, **kwargs)
827836
if method == "map-reduce":
828-
assert_equal(groups, [1, 30, 2, 31, 3, 4, 40])
837+
assert_equal(groups, np.array([1, 30, 2, 31, 3, 4, 40], dtype=np.int64))
829838
else:
830-
assert_equal(groups, [1, 30, 2, 31, 3, 40, 4])
839+
assert_equal(groups, np.array([1, 30, 2, 31, 3, 40, 4], dtype=np.int64))
831840
reindexed = reindex_(actual, groups, pd.Index(sorted_groups))
832841
assert_equal(reindexed, expected)
833842

@@ -950,7 +959,7 @@ def test_factorize_values_outside_bins():
950959
fastpath=True,
951960
)
952961
actual = vals[0]
953-
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]])
962+
expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.int64)
954963
assert_equal(expected, actual)
955964

956965

@@ -967,7 +976,7 @@ def test_multiple_groupers() -> None:
967976
reindex=True,
968977
func="count",
969978
)
970-
expected = np.eye(5, 5, dtype=int)
979+
expected = np.eye(5, 5, dtype=np.int64)
971980
assert_equal(expected, actual)
972981

973982

@@ -979,38 +988,38 @@ def test_factorize_reindex_sorting_strings():
979988
)
980989

981990
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
982-
assert_equal(expected, [0, 1, 4, 2])
991+
assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.int64))
983992

984993
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
985-
assert_equal(expected, [0, 3, 4, 1])
994+
assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.int64))
986995

987996
expected = factorize_(**kwargs, reindex=False, sort=False)[0]
988-
assert_equal(expected, [0, 1, 2, 3])
997+
assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.int64))
989998

990999
expected = factorize_(**kwargs, reindex=False, sort=True)[0]
991-
assert_equal(expected, [0, 1, 3, 2])
1000+
assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.int64))
9921001

9931002

9941003
def test_factorize_reindex_sorting_ints():
9951004
kwargs = dict(
9961005
by=(np.array([-10, 1, 10, 2, 3, 5]),),
9971006
axis=-1,
998-
expected_groups=(np.array([0, 1, 2, 3, 4, 5]),),
1007+
expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),),
9991008
)
10001009

10011010
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
1002-
assert_equal(expected, [6, 1, 6, 2, 3, 5])
1011+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
10031012

10041013
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
1005-
assert_equal(expected, [6, 1, 6, 2, 3, 5])
1014+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
10061015

10071016
kwargs["expected_groups"] = (np.arange(5, -1, -1),)
10081017

10091018
expected = factorize_(**kwargs, reindex=True, sort=True)[0]
1010-
assert_equal(expected, [6, 1, 6, 2, 3, 5])
1019+
assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.int64))
10111020

10121021
expected = factorize_(**kwargs, reindex=True, sort=False)[0]
1013-
assert_equal(expected, [6, 4, 6, 3, 2, 0])
1022+
assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.int64))
10141023

10151024

10161025
@requires_dask

0 commit comments

Comments
 (0)