Skip to content

Commit 9dd126c

Browse files
authored
benchmarks updates (#273)
* Switch to mamba * Switch to mamba * fix/ * Add mamba to env * reduce * fix? * fix skipping * Speedup? * Remove custom bench env yaml * [skip-ci] fix bench env * [skip-ci]fix? * [skip-ci] again * Revert "[skip-ci] again" This reverts commit b440a4e. * Revert "[skip-ci]fix?" This reverts commit f045a64. * Revert "[skip-ci] fix bench env" This reverts commit 2b5add3. * Revert "Remove custom bench env yaml" This reverts commit 518ff1a. * add back custom bench env * fix reduce bare with 2D arrays * try avoiding env file again * small cleanups * try again
1 parent 0e1b0d8 commit 9dd126c

File tree

4 files changed

+66
-124
lines changed

4 files changed

+66
-124
lines changed

.github/workflows/benchmarks.yml

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,13 @@ jobs:
2424
- name: Set up conda environment
2525
uses: mamba-org/setup-micromamba@v1
2626
with:
27-
environment-file: ci/environment.yml
28-
environment-name: flox-tests
27+
environment-name: flox-bench
28+
create-args: >-
29+
python=3.10
30+
asv
31+
mamba
2932
init-shell: bash
3033
cache-environment: true
31-
# create-args: |
32-
# python="${{ matrix.python-version }}"
33-
34-
# - name: Setup some dependencies
35-
# shell: bash -l {0}
36-
# run: |
37-
# pip install asv
38-
# sudo apt-get update -y
3934

4035
- name: Run benchmarks
4136
shell: bash -l {0}
@@ -47,14 +42,11 @@ jobs:
4742
ASV_FACTOR: 1.5
4843
ASV_SKIP_SLOW: 1
4944
run: |
50-
set -x
45+
# set -x
5146
# ID this runner
5247
asv machine --yes
5348
echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
5449
echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
55-
# Use mamba for env creation
56-
# export CONDA_EXE=$(which mamba)
57-
export CONDA_EXE=$(which conda)
5850
# Run benchmarks for current commit against base
5951
ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
6052
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \

asv_bench/asv.conf.json

Lines changed: 5 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,8 @@
3131
// List of branches to benchmark. If not provided, defaults to "master"
3232
// (for git) or "default" (for mercurial).
3333
"branches": ["main"], // for git
34-
// "branches": ["default"], // for mercurial
35-
36-
// The DVCS being used. If not set, it will be automatically
37-
// determined from "repo" by looking at the protocol in the URL
38-
// (if remote), or by looking for special directories, such as
39-
// ".git" (if local).
4034
"dvcs": "git",
4135

42-
// The tool to use to create environments. May be "conda",
43-
// "virtualenv" or other value depending on the plugins in use.
44-
// If missing or the empty string, the tool will be automatically
45-
// determined by looking for tools on the PATH environment
46-
// variable.
47-
"environment_type": "conda",
48-
4936
// timeout in seconds for installing any dependencies in environment
5037
// defaults to 10 min
5138
"install_timeout": 600,
@@ -55,63 +42,11 @@
5542

5643
// The Pythons you'd like to test against. If not provided, defaults
5744
// to the current version of Python used to run `asv`.
58-
"pythons": ["3.9"],
59-
60-
// The list of conda channel names to be searched for benchmark
61-
// dependency packages in the specified order
62-
"conda_channels": ["conda-forge", "nodefaults"],
63-
64-
// The matrix of dependencies to test. Each key is the name of a
65-
// package (in PyPI) and the values are version numbers. An empty
66-
// list or empty string indicates to just test against the default
67-
// (latest) version. null indicates that the package is to not be
68-
// installed. If the package to be tested is only available from
69-
// PyPi, and the 'environment_type' is conda, then you can preface
70-
// the package name by 'pip+', and the package will be installed via
71-
// pip (with all the conda available packages installed first,
72-
// followed by the pip installed packages).
73-
//
74-
"matrix": {
75-
"numbagg": [""],
76-
"numpy_groupies": [""],
77-
"numpy": [""],
78-
"pandas": [""],
79-
"dask-core": [""],
80-
"xarray": [""],
81-
},
82-
83-
// Combinations of libraries/python versions can be excluded/included
84-
// from the set to test. Each entry is a dictionary containing additional
85-
// key-value pairs to include/exclude.
86-
//
87-
// An exclude entry excludes entries where all values match. The
88-
// values are regexps that should match the whole string.
89-
//
90-
// An include entry adds an environment. Only the packages listed
91-
// are installed. The 'python' key is required. The exclude rules
92-
// do not apply to includes.
93-
//
94-
// In addition to package names, the following keys are available:
95-
//
96-
// - python
97-
// Python version, as in the *pythons* variable above.
98-
// - environment_type
99-
// Environment type, as above.
100-
// - sys_platform
101-
// Platform, as in sys.platform. Possible values for the common
102-
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
103-
//
104-
// "exclude": [
105-
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
106-
// {"environment_type": "conda", "six": null}, // don't run without six on conda
107-
// ],
108-
//
109-
// "include": [
110-
// // additional env for python2.7
111-
// {"python": "2.7", "numpy": "1.8"},
112-
// // additional env if run on windows+conda
113-
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
114-
// ],
45+
// "pythons": ["3.9"],
46+
47+
"environment_type": "mamba",
48+
"conda_channels": ["conda-forge"],
49+
"conda_environment_file": "../ci/benchmark.yml",
11550

11651
// The directory (relative to the current directory) that benchmarks are
11752
// stored in. If not provided, defaults to "benchmarks"

asv_bench/benchmarks/reduce.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,20 @@
66
import flox.aggregations
77

88
N = 3000
9-
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "var", "count", "all"]
9+
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
1010
engines = ["flox", "numpy", "numbagg"]
1111
expected_groups = {
1212
"None": None,
13-
"RangeIndex": pd.RangeIndex(5),
1413
"bins": pd.IntervalIndex.from_breaks([1, 2, 4]),
1514
}
1615
expected_names = tuple(expected_groups)
1716

1817
NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"]
19-
20-
numbagg_skip = [
21-
(func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS
22-
] + [(func, expected_names[1], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS]
18+
numbagg_skip = []
19+
for name in expected_names:
20+
numbagg_skip.extend(
21+
list((func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS)
22+
)
2323

2424

2525
def setup_jit():
@@ -42,7 +42,7 @@ class ChunkReduce:
4242
"""Time the core reduction function."""
4343

4444
min_run_count = 5
45-
warmup_time = 1
45+
warmup_time = 0.5
4646

4747
def setup(self, *args, **kwargs):
4848
raise NotImplementedError
@@ -59,18 +59,6 @@ def time_reduce(self, func, expected_name, engine):
5959
expected_groups=expected_groups[expected_name],
6060
)
6161

62-
@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
63-
def time_reduce_bare(self, func, engine):
64-
flox.aggregations.generic_aggregate(
65-
self.labels,
66-
self.array,
67-
axis=-1,
68-
size=5,
69-
func=func,
70-
engine=engine,
71-
fill_value=0,
72-
)
73-
7462
@skip_for_params(numbagg_skip)
7563
@parameterize({"func": funcs, "expected_name": expected_names, "engine": engines})
7664
def peakmem_reduce(self, func, expected_name, engine):
@@ -92,13 +80,18 @@ def setup(self, *args, **kwargs):
9280
if "numbagg" in args:
9381
setup_jit()
9482

95-
96-
class ChunkReduce1DUnsorted(ChunkReduce):
97-
def setup(self, *args, **kwargs):
98-
self.array = np.ones((N,))
99-
self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
100-
self.axis = -1
101-
setup_jit()
83+
@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
84+
def time_reduce_bare(self, func, engine):
85+
# TODO: migrate to the other test cases, but we'll have to setup labels
86+
# appropriately ;(
87+
flox.aggregations.generic_aggregate(
88+
self.labels,
89+
self.array,
90+
axis=self.axis,
91+
func=func,
92+
engine=engine,
93+
fill_value=0,
94+
)
10295

10396

10497
class ChunkReduce2D(ChunkReduce):
@@ -109,14 +102,6 @@ def setup(self, *args, **kwargs):
109102
setup_jit()
110103

111104

112-
class ChunkReduce2DUnsorted(ChunkReduce):
113-
def setup(self, *args, **kwargs):
114-
self.array = np.ones((N, N))
115-
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
116-
self.axis = -1
117-
setup_jit()
118-
119-
120105
class ChunkReduce2DAllAxes(ChunkReduce):
121106
def setup(self, *args, **kwargs):
122107
self.array = np.ones((N, N))
@@ -125,9 +110,24 @@ def setup(self, *args, **kwargs):
125110
setup_jit()
126111

127112

128-
class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
129-
def setup(self, *args, **kwargs):
130-
self.array = np.ones((N, N))
131-
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
132-
self.axis = None
133-
setup_jit()
113+
# class ChunkReduce2DUnsorted(ChunkReduce):
114+
# def setup(self, *args, **kwargs):
115+
# self.array = np.ones((N, N))
116+
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
117+
# self.axis = -1
118+
# setup_jit()
119+
120+
# class ChunkReduce1DUnsorted(ChunkReduce):
121+
# def setup(self, *args, **kwargs):
122+
# self.array = np.ones((N,))
123+
# self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
124+
# self.axis = -1
125+
# setup_jit()
126+
127+
128+
# class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
129+
# def setup(self, *args, **kwargs):
130+
# self.array = np.ones((N, N))
131+
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
132+
# self.axis = None
133+
# setup_jit()

ci/benchmark.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: flox-bench
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- asv
6+
- cachey
7+
- dask-core
8+
- numpy>=1.20
9+
- mamba
10+
- pip
11+
- python=3.10
12+
- xarray
13+
- numpy_groupies>=0.9.19
14+
- numbagg>=0.3
15+
- wheel

0 commit comments

Comments
 (0)