Skip to content

benchmarks updates #273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,13 @@ jobs:
- name: Set up conda environment
uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci/environment.yml
environment-name: flox-tests
environment-name: flox-bench
create-args: >-
python=3.10
asv
mamba
init-shell: bash
cache-environment: true
# create-args: |
# python="${{ matrix.python-version }}"

# - name: Setup some dependencies
# shell: bash -l {0}
# run: |
# pip install asv
# sudo apt-get update -y

- name: Run benchmarks
shell: bash -l {0}
Expand All @@ -47,14 +42,11 @@ jobs:
ASV_FACTOR: 1.5
ASV_SKIP_SLOW: 1
run: |
set -x
# set -x
# ID this runner
asv machine --yes
echo "Baseline: ${{ github.event.pull_request.base.sha }} (${{ github.event.pull_request.base.label }})"
echo "Contender: ${GITHUB_SHA} (${{ github.event.pull_request.head.label }})"
# Use mamba for env creation
# export CONDA_EXE=$(which mamba)
export CONDA_EXE=$(which conda)
# Run benchmarks for current commit against base
ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR"
asv continuous $ASV_OPTIONS ${{ github.event.pull_request.base.sha }} ${GITHUB_SHA} \
Expand Down
75 changes: 5 additions & 70 deletions asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,8 @@
// List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial).
"branches": ["main"], // for git
// "branches": ["default"], // for mercurial

// The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as
// ".git" (if local).
"dvcs": "git",

// The tool to use to create environments. May be "conda",
// "virtualenv" or other value depending on the plugins in use.
// If missing or the empty string, the tool will be automatically
// determined by looking for tools on the PATH environment
// variable.
"environment_type": "conda",

// timeout in seconds for installing any dependencies in environment
// defaults to 10 min
"install_timeout": 600,
Expand All @@ -55,63 +42,11 @@

// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
"pythons": ["3.9"],

// The list of conda channel names to be searched for benchmark
// dependency packages in the specified order
"conda_channels": ["conda-forge", "nodefaults"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
// list or empty string indicates to just test against the default
// (latest) version. null indicates that the package is to not be
// installed. If the package to be tested is only available from
// PyPi, and the 'environment_type' is conda, then you can preface
// the package name by 'pip+', and the package will be installed via
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
//
"matrix": {
"numbagg": [""],
"numpy_groupies": [""],
"numpy": [""],
"pandas": [""],
"dask-core": [""],
"xarray": [""],
},

// Combinations of libraries/python versions can be excluded/included
// from the set to test. Each entry is a dictionary containing additional
// key-value pairs to include/exclude.
//
// An exclude entry excludes entries where all values match. The
// values are regexps that should match the whole string.
//
// An include entry adds an environment. Only the packages listed
// are installed. The 'python' key is required. The exclude rules
// do not apply to includes.
//
// In addition to package names, the following keys are available:
//
// - python
// Python version, as in the *pythons* variable above.
// - environment_type
// Environment type, as above.
// - sys_platform
// Platform, as in sys.platform. Possible values for the common
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
//
// "exclude": [
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
// {"environment_type": "conda", "six": null}, // don't run without six on conda
// ],
//
// "include": [
// // additional env for python2.7
// {"python": "2.7", "numpy": "1.8"},
// // additional env if run on windows+conda
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
// ],
// "pythons": ["3.9"],

"environment_type": "mamba",
"conda_channels": ["conda-forge"],
"conda_environment_file": "../ci/benchmark.yml",

// The directory (relative to the current directory) that benchmarks are
// stored in. If not provided, defaults to "benchmarks"
Expand Down
80 changes: 40 additions & 40 deletions asv_bench/benchmarks/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@
import flox.aggregations

N = 3000
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "var", "count", "all"]
funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"]
engines = ["flox", "numpy", "numbagg"]
expected_groups = {
"None": None,
"RangeIndex": pd.RangeIndex(5),
"bins": pd.IntervalIndex.from_breaks([1, 2, 4]),
}
expected_names = tuple(expected_groups)

NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"]

numbagg_skip = [
(func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS
] + [(func, expected_names[1], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS]
numbagg_skip = []
for name in expected_names:
numbagg_skip.extend(
list((func, expected_names[0], "numbagg") for func in funcs if func not in NUMBAGG_FUNCS)
)


def setup_jit():
Expand All @@ -42,7 +42,7 @@ class ChunkReduce:
"""Time the core reduction function."""

min_run_count = 5
warmup_time = 1
warmup_time = 0.5

def setup(self, *args, **kwargs):
raise NotImplementedError
Expand All @@ -59,18 +59,6 @@ def time_reduce(self, func, expected_name, engine):
expected_groups=expected_groups[expected_name],
)

@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
def time_reduce_bare(self, func, engine):
flox.aggregations.generic_aggregate(
self.labels,
self.array,
axis=-1,
size=5,
func=func,
engine=engine,
fill_value=0,
)

@skip_for_params(numbagg_skip)
@parameterize({"func": funcs, "expected_name": expected_names, "engine": engines})
def peakmem_reduce(self, func, expected_name, engine):
Expand All @@ -92,13 +80,18 @@ def setup(self, *args, **kwargs):
if "numbagg" in args:
setup_jit()


class ChunkReduce1DUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N,))
self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
self.axis = -1
setup_jit()
@parameterize({"func": ["nansum", "nanmean", "nanmax", "count"], "engine": engines})
def time_reduce_bare(self, func, engine):
# TODO: migrate to the other test cases, but we'll have to setup labels
# appropriately ;(
flox.aggregations.generic_aggregate(
self.labels,
self.array,
axis=self.axis,
func=func,
engine=engine,
fill_value=0,
)


class ChunkReduce2D(ChunkReduce):
Expand All @@ -109,14 +102,6 @@ def setup(self, *args, **kwargs):
setup_jit()


class ChunkReduce2DUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
self.axis = -1
setup_jit()


class ChunkReduce2DAllAxes(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
Expand All @@ -125,9 +110,24 @@ def setup(self, *args, **kwargs):
setup_jit()


class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
def setup(self, *args, **kwargs):
self.array = np.ones((N, N))
self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
self.axis = None
setup_jit()
# class ChunkReduce2DUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N, N))
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
# self.axis = -1
# setup_jit()

# class ChunkReduce1DUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N,))
# self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5))
# self.axis = -1
# setup_jit()


# class ChunkReduce2DAllAxesUnsorted(ChunkReduce):
# def setup(self, *args, **kwargs):
# self.array = np.ones((N, N))
# self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5))
# self.axis = None
# setup_jit()
15 changes: 15 additions & 0 deletions ci/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: flox-bench
channels:
- conda-forge
dependencies:
- asv
- cachey
- dask-core
- numpy>=1.20
- mamba
- pip
- python=3.10
- xarray
- numpy_groupies>=0.9.19
- numbagg>=0.3
- wheel