diff --git a/.gitignore b/.gitignore
index 6cb24dd..8a120cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,3 +129,4 @@ dmypy.json
 .pyre/
 
 results
+benchmarks/bm_pytorch_alexnet_inference/data/dog.jpg
diff --git a/README.md b/README.md
index b0b1524..a7b9ef0 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,21 @@
 # python-macrobenchmarks
 A collection of macro benchmarks for the Python programming language
+
+
+## usage
+
+```shell
+# Run the default benchmarks:
+python3 -m pyperformance run --manifest $PWD/benchmarks/MANIFEST
+```
+
+The benchmarks can still be run without pyperformance.  This will produce
+ the old results format.
+
+```shell
+# Run the benchmarks:
+sh ./run_all.sh
+
+# Run the mypy benchmark using mypyc:
+sh ./run_mypy.sh
+```
diff --git a/benchmarks/.libs/legacyutils.py b/benchmarks/.libs/legacyutils.py
new file mode 100644
index 0000000..c91e136
--- /dev/null
+++ b/benchmarks/.libs/legacyutils.py
@@ -0,0 +1,22 @@
+import json
+import sys
+
+
+def maybe_handle_legacy(bench_func, *args, loopsarg='loops', legacyarg=None):
+    if '--legacy' not in sys.argv:
+        return
+    argv = list(sys.argv[1:])
+    argv.remove('--legacy')
+
+    kwargs = {}
+    if legacyarg:
+        kwargs[legacyarg] = True
+    if argv:
+        assert loopsarg
+        kwargs[loopsarg] = int(argv[0])
+
+    _, times = bench_func(*args, **kwargs)
+    if len(argv) > 1:
+        json.dump(times, open(argv[1], 'w'))
+
+    sys.exit(0)
diff --git a/benchmarks/.libs/netutils.py b/benchmarks/.libs/netutils.py
new file mode 100644
index 0000000..9bd9833
--- /dev/null
+++ b/benchmarks/.libs/netutils.py
@@ -0,0 +1,88 @@
+import contextlib
+import ipaddress
+import os.path
+import socket
+import subprocess
+import time
+
+
+@contextlib.contextmanager
+def serving(argv, sitedir, addr, *,
+            pause=None,
+            kill=False,
+            quiet=True,
+            ):
+    if os.path.exists(addr):
+        sock = addr
+        addr = None
+        try:
+            os.remove(sock)
+        except FileNotFoundError:
+            pass
+    else:
+        sock = None
+
+    p = subprocess.Popen(
+        argv,
+        cwd=sitedir,
+        stdout=subprocess.DEVNULL if quiet else None,
+        stderr=subprocess.STDOUT if quiet else None,
+    )
+    try:
+        if pause:
+            time.sleep(pause)
+        if not sock:
+            try:
+                waitUntilUp(addr)
+            except NotImplementedError:
+                sock = addr
+                addr = None
+        if sock:
+            while not os.path.exists(sock):
+                time.sleep(0.001)
+        assert p.poll() is None, p.poll()
+        yield
+        assert p.poll() is None, p.poll()
+    finally:
+        p.terminate()
+        if kill:
+            p.kill()
+        p.wait()
+
+
+def waitUntilUp(addr, timeout=10.0):
+    end = time.time() + timeout
+    addr = parse_socket_addr(addr)
+    started = False
+    current = time.time()
+    while not started or current <= end:
+        try:
+            with socket.create_connection(addr) as sock:
+                return
+        except ConnectionRefusedError:
+            time.sleep(0.001)
+        started = True
+        current = time.time()
+    raise Exception('Timeout reached when trying to connect')
+
+
+def parse_socket_addr(addr, *, resolve=True):
+    if not isinstance(addr, str):
+        raise NotImplementedError(addr)
+    host, _, port = addr.partition(':')
+
+    if not host:
+        raise NotImplementedError(addr)
+    try:
+        host = ipaddress.ip_address(host)
+    except ValueError:
+        raise NotImplementedError(addr)
+    host = str(host)
+
+    if not port:
+        raise NotImplementedError(addr)
+    if not port.isdigit():
+        raise NotImplementedError(addr)
+    port = int(port)
+
+    return (host, port)
diff --git a/benchmarks/MANIFEST b/benchmarks/MANIFEST
new file mode 100644
index 0000000..7af33eb
--- /dev/null
+++ b/benchmarks/MANIFEST
@@ -0,0 +1,19 @@
+[benchmarks]
+
+name	metafile
+aiohttp	<local>
+djangocms	<local>
+flaskblogging	<local>
+gevent_hub	<local>
+gunicorn	<local>
+json	<local>
+kinto	<local>
+mypy	<local>
+mypyc	<local:mypy>
+pycparser	<local>
+pylint	<local>
+pytorch_alexnet_inference	<local>
+thrift	<local>
+
+[group default]
+-mypyc
diff --git a/benchmarks/aiohttp.py b/benchmarks/aiohttp.py
deleted file mode 100644
index 45f26ca..0000000
--- a/benchmarks/aiohttp.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import json
-import os
-import requests
-import subprocess
-import sys
-import threading
-import time
-
-from djangocms import waitUntilUp
-
-if __name__ == "__main__":
-    exe = sys.executable
-
-    times = []
-
-    p = subprocess.Popen([exe, "gunicorn_serve.py"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.join(os.path.dirname(__file__), "../data"))
-    try:
-        waitUntilUp(("127.0.0.1", 8080))
-
-        n = 3000
-        if len(sys.argv) > 1:
-            n = int(sys.argv[1])
-
-        start = time.time()
-        for i in range(n):
-            times.append(time.time())
-            if i % 100 == 0:
-                print(i, time.time() - start)
-            requests.get("http://localhost:8080/blog/").text
-        times.append(time.time())
-        elapsed = time.time() - start
-        print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed))
-
-        assert p.poll() is None, p.poll()
-
-    finally:
-        p.terminate()
-        p.wait()
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/base.toml b/benchmarks/base.toml
new file mode 120000
index 0000000..1e11d78
--- /dev/null
+++ b/benchmarks/base.toml
@@ -0,0 +1 @@
+../pyproject.toml
\ No newline at end of file
diff --git a/data/gunicorn_serve.py b/benchmarks/bm_aiohttp/data/serve.py
similarity index 100%
rename from data/gunicorn_serve.py
rename to benchmarks/bm_aiohttp/data/serve.py
diff --git a/benchmarks/bm_aiohttp/legacyutils.py b/benchmarks/bm_aiohttp/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_aiohttp/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_aiohttp/netutils.py b/benchmarks/bm_aiohttp/netutils.py
new file mode 120000
index 0000000..3afa43f
--- /dev/null
+++ b/benchmarks/bm_aiohttp/netutils.py
@@ -0,0 +1 @@
+../.libs/netutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_aiohttp/pyproject.toml b/benchmarks/bm_aiohttp/pyproject.toml
new file mode 100644
index 0000000..dbe5021
--- /dev/null
+++ b/benchmarks/bm_aiohttp/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "bm_aiohttp"
+dependencies = [
+    "aiohttp",
+    "gunicorn",
+    "requests",
+    "uvloop",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/aiohttp_requirements.txt b/benchmarks/bm_aiohttp/requirements.txt
similarity index 100%
rename from benchmarks/aiohttp_requirements.txt
rename to benchmarks/bm_aiohttp/requirements.txt
diff --git a/benchmarks/bm_aiohttp/run_benchmark.py b/benchmarks/bm_aiohttp/run_benchmark.py
new file mode 100644
index 0000000..bf3ecf5
--- /dev/null
+++ b/benchmarks/bm_aiohttp/run_benchmark.py
@@ -0,0 +1,70 @@
+import os.path
+import requests
+import sys
+
+import pyperf
+import netutils
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+ARGV = [sys.executable, "serve.py"]
+
+
+#############################
+# benchmarks
+
+def bench_aiohttp_requests(loops=3000):
+    elapsed, _ = _bench_aiohttp_requests(loops)
+    return elapsed
+
+
+def _bench_aiohttp_requests(loops=3000, legacy=False):
+    """Measure N HTTP requests to a local server.
+
+    Note that the server is freshly started here.
+
+    Only the time for requests is measured here.  The following are not:
+
+    * preparing the site the server will serve
+    * starting the server
+    * stopping the server
+
+    Hence this should be used with bench_time_func()
+    insted of bench_func().
+    """
+    start = pyperf.perf_counter()
+    elapsed = 0
+    times = []
+    with netutils.serving(ARGV, DATADIR, "127.0.0.1:8080"):
+        requests_get = requests.get
+        for i in range(loops):
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long a request takes.
+            t0 = pyperf.perf_counter()
+            requests_get("http://localhost:8080/blog/").text
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+            if legacy and (i % 100 == 0):
+                print(i, t0 - start)
+        times.append(pyperf.perf_counter())
+        if legacy:
+            total = times[-1] - start
+            print("%.2fs (%.3freq/s)" % (total, loops / total))
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_aiohttp_requests, legacyarg='legacy')
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of aiohttp"
+    runner.bench_time_func("aiohttp", bench_aiohttp_requests)
diff --git a/benchmarks/bm_djangocms/legacyutils.py b/benchmarks/bm_djangocms/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_djangocms/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_djangocms/netutils.py b/benchmarks/bm_djangocms/netutils.py
new file mode 120000
index 0000000..3afa43f
--- /dev/null
+++ b/benchmarks/bm_djangocms/netutils.py
@@ -0,0 +1 @@
+../.libs/netutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_djangocms/pyproject.toml b/benchmarks/bm_djangocms/pyproject.toml
new file mode 100644
index 0000000..84a16e4
--- /dev/null
+++ b/benchmarks/bm_djangocms/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "bm_djangocms"
+dependencies = [
+    "Django",
+    "django-cms",
+    "djangocms-bootstrap4",
+    "djangocms-file",
+    "djangocms-googlemap",
+    "djangocms-installer",
+    "djangocms-snippet",
+    "djangocms-style",
+    "djangocms-video",
+    "requests",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/djangocms_requirements.txt b/benchmarks/bm_djangocms/requirements.txt
similarity index 100%
rename from benchmarks/djangocms_requirements.txt
rename to benchmarks/bm_djangocms/requirements.txt
diff --git a/benchmarks/bm_djangocms/run_benchmark.py b/benchmarks/bm_djangocms/run_benchmark.py
new file mode 100644
index 0000000..e02fa60
--- /dev/null
+++ b/benchmarks/bm_djangocms/run_benchmark.py
@@ -0,0 +1,250 @@
+"""
+Django-cms test
+Sets up a djangocms installation, and hits '/' a number of times.
+'/' is not super interesting, but it still exercises a little bit of
+functionality; looking at cms/templates/cms/welcome.html, it seems
+to do a decent amount of template logic, as well as do some basic
+user auth.
+We could probably improve the flow though, perhaps by logging in
+and browsing around.
+"""
+
+import contextlib
+import os
+import os.path
+import requests
+import shutil
+import subprocess
+import sys
+import tempfile
+
+import pyperf
+import netutils
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+PID_FILE = os.path.join(DATADIR, "setup.pid")
+# It might be interesting to put the temporary directory in /dev/shm,
+# which makes the initial db migration about 20% faster.
+TEMP_DIR = None
+TEMP_PREFIX = "djangocms_bench_"
+
+INNER_LOOPS = 800
+
+# site
+SITE_NAME = "testsite"
+SETTINGS = """
+from django.db.backends.signals import connection_created
+def set_no_sychronous(sender, connection, **kwargs):
+    if connection.vendor == 'sqlite':
+        cursor = connection.cursor()
+        cursor.execute('PRAGMA synchronous = OFF;')
+
+connection_created.connect(set_no_sychronous)
+"""
+
+# django commands
+DJANGOCMS = os.path.join(
+    os.path.dirname(sys.executable),
+    "djangocms",
+)
+ARGV_CREATE = [DJANGOCMS, SITE_NAME, "--verbose", "--no-sync"]
+ARGV_MIGRATE = [sys.executable, "manage.py", "migrate"]
+ARGV_SERVE = [sys.executable, "manage.py", "runserver", "--noreload"]
+
+
+def setup(rootdir):
+    """
+    Set up a djangocms installation.
+    Runs the initial bootstrapping without the db migration,
+    so that we can turn off sqlite synchronous and avoid fs time.
+    Rough testing shows that setting synchronous=OFF is basically
+    the same performance as running on /dev/shm.
+    """
+    sitedir = os.path.join(rootdir, SITE_NAME)  # This is where Django puts it.
+
+    # Delete the site dir if it already exists.
+    if os.path.exists(sitedir):
+        shutil.rmtree(datadir, ignore_errors=False)
+
+    # First, create the site.
+    subprocess.check_call(ARGV_CREATE, cwd=rootdir)
+
+    # Add customizations.
+    settingsfile = os.path.join(sitedir, SITE_NAME, "settings.py")
+    with open(settingsfile, "a") as f:
+        f.write(SETTINGS)
+
+    # Finalize the site.
+    t0 = pyperf.perf_counter()
+    subprocess.check_call(ARGV_MIGRATE, cwd=sitedir)
+    elapsed = pyperf.perf_counter() - t0
+
+    return sitedir, elapsed
+
+
+# This is a generic util that might make sense to put in a separate lib.
+def _ensure_python_on_PATH(python=sys.executable):
+    PATH = os.environ["PATH"].split(os.pathsep)
+    PATH.insert(0, os.path.dirname(python))
+    os.environ["PATH"] = os.pathsep.join(PATH)
+
+
+@contextlib.contextmanager
+def _ensure_datadir(datadir, preserve=True):
+    if datadir:
+        try:
+            os.makedirs(datadir)
+        except FileExistsError:
+            if preserve is None:
+                preserve = True
+            elif not preserve:
+                raise NotImplementedError(datadir)
+    else:
+        datadir = tempfile.mkdtemp(prefix=TEMP_PREFIX, dir=TEMP_DIR)
+
+    try:
+        yield datadir
+    finally:
+        if not preserve:
+            shutil.rmtree(datadir, ignore_errors=True)
+
+
+#############################
+# benchmarks
+
+def bench_djangocms_requests(sitedir, loops=INNER_LOOPS):
+    elapsed, _ = _bench_djangocms_requests(loops)
+    return elapsed
+
+
+def _bench_djangocms_requests(sitedir, loops=INNER_LOOPS, legacy=False):
+    """Measure N HTTP requests to a local server.
+
+    Note that the server is freshly started here.
+
+    Only the time for requests is measured here.  The following are not:
+
+    * preparing the site the server will serve
+    * starting the server
+    * stopping the server
+
+    Hence this should be used with bench_time_func()
+    insted of bench_func().
+    """
+    start = pyperf.perf_counter()
+    elapsed = 0
+    times = []
+    with netutils.serving(ARGV_SERVE, sitedir, "127.0.0.1:8000"):
+        for i in range(loops):
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long a request takes.
+            t0 = pyperf.perf_counter()
+            requests.get("http://localhost:8000/").text
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+            if legacy and (i % 100 == 0):
+                print(i, t0 - start)
+        times.append(pyperf.perf_counter())
+        if legacy:
+            total = times[-1] - start
+            print("%.2fs (%.3freq/s)" % (total, loops / total))
+    return elapsed, times
+
+
+# We can't set "add_cmdline_args" on pyperf.Runner
+# once we've created one.  We work around this with a subclass.
+
+class _Runner(pyperf.Runner):
+    datadir = None
+
+    def __init__(self):
+        def add_worker_args(cmd, _):
+            assert self.datadir
+            cmd.extend([
+                '--serve', self.datadir,
+            ])
+        super().__init__(
+            add_cmdline_args=add_worker_args,
+        )
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    """
+    Usage:
+        python benchmarks/bm_djangocms/run_benchmark.py
+        python benchmarks/bm_djangocms/run_benchmark.py --setup DIR
+        python benchmarks/bm_djangocms/run_benchmark.py --serve DIR
+
+    The first form creates a temporary directory, sets up djangocms in it,
+    serves out of it, and removes the directory.
+    The second form sets up a djangocms installation in the given directory.
+    The third form runs the benchmark out of an already-set-up directory
+    The second and third forms are useful if you want to benchmark the
+    initial migration phase separately from the second serving phase.
+    """
+    runner = _Runner()
+    runner.metadata['description'] = "Test the performance of a Django data migration"
+
+    # Parse the CLI args.
+    runner.argparser.add_argument("--legacy", action='store_true')
+    group = runner.argparser.add_mutually_exclusive_group()
+    group.add_argument("--setup")
+    group.add_argument("--serve")
+    args = runner.argparser.parse_args()
+
+    if args.setup is not None:
+        args.datadir = args.setup
+        args.setup = True
+        args.serve = False
+    elif args.serve is not None:
+        args.datadir = args.serve
+        args.setup = False
+        args.serve = True
+        if not os.path.exists(args.datadir):
+            cmd = f"{sys.executable} {sys.argv[0]} --setup {args.datadir}?"
+            sys.exit(f"ERROR: Did you forget to run {cmd}?")
+    else:
+        args.datadir = None
+        args.setup = True
+        args.serve = True
+
+    # DjangoCMS looks for Python on $PATH?
+    _ensure_python_on_PATH()
+
+    # Get everything ready and then perform the requested operations.
+    preserve = True if args.setup and not args.serve else None
+    with _ensure_datadir(args.datadir, preserve) as datadir:
+        # First, set up the site.
+        if args.setup:
+            sitedir, elapsed = setup(datadir)
+            if args.legacy:
+                print("%.2fs to initialize db" % (elapsed,))
+                print(f"site created in {sitedir}")
+            if not args.serve:
+                print(f"now run {sys.executable} {sys.argv[0]} --serve {datadir}")
+        else:
+            # This is what a previous call to setup() would have returned.
+            sitedir = os.path.join(datadir, SITE_NAME)
+
+        # Then run the benchmark.
+        if args.serve:
+            if args.legacy:
+                from legacyutils import maybe_handle_legacy
+                maybe_handle_legacy(_bench_djangocms_requests, sitedir, legacyarg='legacy')
+                sys.exit(0)
+
+            runner.datadir = datadir
+
+            def time_func(loops, *args):
+                return bench_djangocms_requests(*args, loops=loops)
+            runner.bench_time_func("djangocms", time_func, sitedir,
+                                   inner_loops=INNER_LOOPS)
diff --git a/data/flaskblogging_serve.py b/benchmarks/bm_flaskblogging/data/serve.py
similarity index 100%
rename from data/flaskblogging_serve.py
rename to benchmarks/bm_flaskblogging/data/serve.py
diff --git a/benchmarks/bm_flaskblogging/legacyutils.py b/benchmarks/bm_flaskblogging/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_flaskblogging/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_flaskblogging/netutils.py b/benchmarks/bm_flaskblogging/netutils.py
new file mode 120000
index 0000000..3afa43f
--- /dev/null
+++ b/benchmarks/bm_flaskblogging/netutils.py
@@ -0,0 +1 @@
+../.libs/netutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_flaskblogging/pyproject.toml b/benchmarks/bm_flaskblogging/pyproject.toml
new file mode 100644
index 0000000..f07e1e4
--- /dev/null
+++ b/benchmarks/bm_flaskblogging/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "bm_flaskblogging"
+dependencies = [
+    "Flask",
+    "Flask-Blogging",
+    "requests",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/flaskblogging_requirements.txt b/benchmarks/bm_flaskblogging/requirements.txt
similarity index 100%
rename from benchmarks/flaskblogging_requirements.txt
rename to benchmarks/bm_flaskblogging/requirements.txt
diff --git a/benchmarks/bm_flaskblogging/run_benchmark.py b/benchmarks/bm_flaskblogging/run_benchmark.py
new file mode 100644
index 0000000..45089e0
--- /dev/null
+++ b/benchmarks/bm_flaskblogging/run_benchmark.py
@@ -0,0 +1,70 @@
+import os.path
+import requests
+import sys
+
+import pyperf
+import netutils
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+ARGV = [sys.executable, "serve.py"]
+
+
+#############################
+# benchmarks
+
+def bench_flask_requests(loops=1800):
+    elapsed, _ = _bench_flask_requests(loops)
+    return elapsed
+
+
+def _bench_flask_requests(loops=1800, legacy=False):
+    """Measure N HTTP requests to a local server.
+
+    Note that the server is freshly started here.
+
+    Only the time for requests is measured here.  The following are not:
+
+    * preparing the site the server will serve
+    * starting the server
+    * stopping the server
+
+    Hence this should be used with bench_time_func()
+    insted of bench_func().
+    """
+    start = pyperf.perf_counter()
+    elapsed = 0
+    times = []
+    with netutils.serving(ARGV, DATADIR, "127.0.0.1:8000"):
+        requests_get = requests.get
+        for i in range(loops):
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long a request takes.
+            t0 = pyperf.perf_counter()
+            requests_get("http://localhost:8000/blog/").text
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+            if legacy and (i % 100 == 0):
+                print(i, t0 - start)
+        times.append(pyperf.perf_counter())
+        if legacy:
+            total = times[-1] - start
+            print("%.2fs (%.3freq/s)" % (total, loops / total))
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_flask_requests, legacyarg='legacy')
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of flask"
+    runner.bench_time_func("flaskblogging", bench_flask_requests)
diff --git a/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml b/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml
new file mode 100644
index 0000000..40bbf1e
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml
@@ -0,0 +1,7 @@
+[project]
+name = "bm_gevent_cancel_wait"
+dependencies = ["gevent"]
+dynamic = ["version"]
+
+[tool.pyperformance]
+extra_opts = ["gevent_cancel_wait"]
diff --git a/benchmarks/bm_gevent_hub/bm_gevent_switch.toml b/benchmarks/bm_gevent_hub/bm_gevent_switch.toml
new file mode 100644
index 0000000..3f96f66
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/bm_gevent_switch.toml
@@ -0,0 +1,7 @@
+[project]
+name = "bm_gevent_switch"
+dependencies = ["gevent"]
+dynamic = ["version"]
+
+[tool.pyperformance]
+extra_opts = ["gevent_switch"]
diff --git a/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml b/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml
new file mode 100644
index 0000000..11bc9c6
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml
@@ -0,0 +1,7 @@
+[project]
+name = "bm_gevent_wait_func_ready"
+dependencies = ["gevent"]
+dynamic = ["version"]
+
+[tool.pyperformance]
+extra_opts = ["gevent_wait_func_ready"]
diff --git a/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml b/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml
new file mode 100644
index 0000000..6e673c7
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml
@@ -0,0 +1,7 @@
+[project]
+name = "bm_gevent_wait_ready"
+dependencies = ["gevent"]
+dynamic = ["version"]
+
+[tool.pyperformance]
+extra_opts = ["gevent_wait_ready"]
diff --git a/benchmarks/bm_gevent_hub/pyproject.toml b/benchmarks/bm_gevent_hub/pyproject.toml
new file mode 100644
index 0000000..b2eb678
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/pyproject.toml
@@ -0,0 +1,7 @@
+[project]
+name = "bm_gevent_hub"
+dependencies = ["gevent"]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/gevent_bench_hub_requirements.txt b/benchmarks/bm_gevent_hub/requirements.txt
similarity index 100%
rename from benchmarks/gevent_bench_hub_requirements.txt
rename to benchmarks/bm_gevent_hub/requirements.txt
diff --git a/benchmarks/bm_gevent_hub/run_benchmark.py b/benchmarks/bm_gevent_hub/run_benchmark.py
new file mode 100644
index 0000000..4479492
--- /dev/null
+++ b/benchmarks/bm_gevent_hub/run_benchmark.py
@@ -0,0 +1,175 @@
+# -*- coding: utf-8 -*-
+"""
+Benchmarks for hub primitive operations.
+
+Taken from https://github.com/gevent/gevent/blob/master/benchmarks/bench_hub.py
+Modified to remove perf and not need any command line arguments
+"""
+import contextlib
+
+import pyperf
+import gevent
+import gevent.hub
+from greenlet import greenlet
+from greenlet import getcurrent
+
+
+@contextlib.contextmanager
+def active_hub(hub=None):
+    if hub is None:
+        hub = gevent.get_hub()
+    try:
+        yield hub
+    finally:
+        # Destroy the loop so we don't keep building up state (e.g. callbacks).
+        hub.destroy(True)
+
+
+class SwitchingParent(gevent.hub.Hub):
+    """A gevent hub greenlet that switches back and forth with its child."""
+
+    def __init__(self, nswitches):
+        super().__init__(None, None)
+        self.nswitches = nswitches
+        self.child = greenlet(self._run_child, self)
+
+    def _run_child(self):
+        # Back to the hub, which in turn goes
+        # back to the main greenlet
+        switch = getcurrent().parent.switch
+        for _ in range(self.nswitches):
+            switch()
+
+    def run(self):
+        # Return to the main greenlet.
+        switch = self.parent.switch
+        for _ in range(self.nswitches):
+            switch()
+
+
+class NoopWatcher:
+    def start(self, cb, obj):
+        # Immediately switch back to the waiter, mark as ready
+        cb(obj)
+
+    def stop(self):
+        pass
+
+
+class ActiveWatcher:
+    active = True
+    callback = object()
+
+    def close(self):
+        pass
+
+
+class NoopWatchTarget(object):
+    def rawlink(self, cb):
+        cb(self)
+
+
+#############################
+# benchmarks
+
+def bench_switch(loops=1000):
+    """Measure switching between a greenlet and the gevent hub N^2 times."""
+    hub = SwitchingParent(loops)
+    child = hub.child
+
+    with active_hub(hub):
+        elapsed = 0
+        child_switch = child.switch
+        for _ in range(loops):
+            t0 = pyperf.perf_counter()
+            child_switch()
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+        return elapsed
+
+
+def bench_wait_ready(loops=1000):
+    """Measure waiting for a "noop" watcher to become ready N times."""
+    watcher = NoopWatcher()
+
+    with active_hub() as hub:
+        elapsed = 0
+        hub_wait = hub.wait
+        for _ in range(loops):
+            t0 = pyperf.perf_counter()
+            hub_wait(watcher)
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+        return elapsed
+
+
+def bench_cancel_wait(loops=1000):
+    """Measure canceling N watchers.
+    
+    Note that it is the same watcher N times and that it is a fake
+    that pretends to already be started.
+    """
+    watcher = ActiveWatcher()
+
+    with active_hub() as hub:
+        t0 = pyperf.perf_counter()
+
+        # Cancel the fake wait requests.
+        for _ in range(loops):
+            # Schedule all the callbacks.
+            hub.cancel_wait(watcher, None, True)
+
+        # Wait for all the watchers to be closed.
+        # TODO Start timing here?
+        for cb in hub.loop._callbacks:
+            if cb.callback:
+                cb.callback(*cb.args)
+                cb.stop()  # so the real loop won't do it
+
+        return pyperf.perf_counter() - t0
+
+
+def bench_wait_func_ready(loops=1000):
+    """Measure waiting for N noop watch targets to become ready."""
+    watched_objects = [NoopWatchTarget() for _ in range(loops)]
+
+    t0 = pyperf.perf_counter()
+    gevent.hub.wait(watched_objects)
+    return pyperf.perf_counter() - t0
+
+
+BENCHMARKS = {
+    "gevent_hub": bench_switch,
+    "gevent_wait_func_ready": bench_wait_func_ready,
+    "gevent_wait_ready": bench_wait_ready,
+    "gevent_cancel_wait": bench_cancel_wait,
+    "gevent_switch": bench_switch,
+}
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    import sys
+    if '--legacy' in sys.argv:
+        for i in range(10000):
+            bench_switch()
+        sys.exit(0)
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of gevent"
+    runner.argparser.add_argument("--legacy", action='store_true')
+    runner.argparser.add_argument("benchmark", nargs="?",
+                                  choices=sorted(BENCHMARKS),
+                                  default="gevent_hub")
+
+    args = runner.parse_args()
+    name = args.benchmark
+    bench = BENCHMARKS[name]
+    assert(bench.__code__.co_varnames[0] == 'loops')
+    inner_loops = bench.__defaults__[0]
+
+    runner.bench_time_func(name, bench, inner_loops=inner_loops)
diff --git a/benchmarks/bm_gunicorn/data/serve_aiohttp.py b/benchmarks/bm_gunicorn/data/serve_aiohttp.py
new file mode 100644
index 0000000..f87888a
--- /dev/null
+++ b/benchmarks/bm_gunicorn/data/serve_aiohttp.py
@@ -0,0 +1,12 @@
+from aiohttp import web
+
+async def hello(request):
+    return web.Response(text="Hello, world")
+
+async def main():
+    app = web.Application()
+    app.add_routes([web.get('/', hello)])
+    return app
+
+if __name__ == "__main__":
+    web.run_app(main())
diff --git a/benchmarks/bm_gunicorn/legacyutils.py b/benchmarks/bm_gunicorn/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_gunicorn/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_gunicorn/netutils.py b/benchmarks/bm_gunicorn/netutils.py
new file mode 120000
index 0000000..3afa43f
--- /dev/null
+++ b/benchmarks/bm_gunicorn/netutils.py
@@ -0,0 +1 @@
+../.libs/netutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_gunicorn/pyproject.toml b/benchmarks/bm_gunicorn/pyproject.toml
new file mode 100644
index 0000000..0f3d550
--- /dev/null
+++ b/benchmarks/bm_gunicorn/pyproject.toml
@@ -0,0 +1,11 @@
+[project]
+name = "bm_gunicorn"
+dependencies = [
+    "gunicorn",
+    "requests",
+    "uvloop",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/gunicorn_requirements.txt b/benchmarks/bm_gunicorn/requirements.txt
similarity index 100%
rename from benchmarks/gunicorn_requirements.txt
rename to benchmarks/bm_gunicorn/requirements.txt
diff --git a/benchmarks/bm_gunicorn/run_benchmark.py b/benchmarks/bm_gunicorn/run_benchmark.py
new file mode 100644
index 0000000..41934f7
--- /dev/null
+++ b/benchmarks/bm_gunicorn/run_benchmark.py
@@ -0,0 +1,80 @@
+import os.path
+import requests
+import sys
+
+import pyperf
+import netutils
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+GUNICORN = os.path.join(
+    os.path.dirname(sys.executable),
+    "gunicorn",
+)
+ADDR = "127.0.0.1:8000"
+ARGV = [
+    GUNICORN, "serve_aiohttp:main",
+    "--bind", ADDR,
+    "-w", "1",
+    "--worker-class", "aiohttp.GunicornWebWorker",
+]
+
+
+#############################
+# benchmarks
+
+def bench_gunicorn(loops=3000):
+    elapsed, _ = _bench_gunicorn(loops)
+    return elapsed
+
+
+def _bench_gunicorn(loops=3000, legacy=False):
+    """Measure N HTTP requests to a local server.
+
+    Note that the server is freshly started here.
+
+    Only the time for requests is measured here.  The following are not:
+
+    * preparing the site the server will serve
+    * starting the server
+    * stopping the server
+
+    Hence this should be used with bench_time_func()
+    insted of bench_func().
+    """
+    start = pyperf.perf_counter()
+    elapsed = 0
+    times = []
+    with netutils.serving(ARGV, DATADIR, ADDR):
+        requests_get = requests.get
+        for i in range(loops):
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long a request takes.
+            t0 = pyperf.perf_counter()
+            requests_get("http://localhost:8000/blog/").text
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+            if legacy and (i % 100 == 0):
+                print(i, t0 - start)
+        times.append(pyperf.perf_counter())
+        if legacy:
+            total = times[-1] - start
+            print("%.2fs (%.3freq/s)" % (total, loops / total))
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_gunicorn, legacyarg='legacy')
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of gunicorn"
+    runner.bench_time_func("gunicorn", bench_gunicorn)
diff --git a/data/reddit_comments.json b/benchmarks/bm_json/data/reddit_comments.json
similarity index 100%
rename from data/reddit_comments.json
rename to benchmarks/bm_json/data/reddit_comments.json
diff --git a/benchmarks/bm_json/legacyutils.py b/benchmarks/bm_json/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_json/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_json/pyproject.toml b/benchmarks/bm_json/pyproject.toml
new file mode 100644
index 0000000..f8bf3fc
--- /dev/null
+++ b/benchmarks/bm_json/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+name = "bm_json"
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/json_bench_requirements.txt b/benchmarks/bm_json/requirements.txt
similarity index 100%
rename from benchmarks/json_bench_requirements.txt
rename to benchmarks/bm_json/requirements.txt
diff --git a/benchmarks/bm_json/run_benchmark.py b/benchmarks/bm_json/run_benchmark.py
new file mode 100644
index 0000000..201a0a1
--- /dev/null
+++ b/benchmarks/bm_json/run_benchmark.py
@@ -0,0 +1,68 @@
+import json
+import os.path
+
+import pyperf
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+TARGET = os.path.join(DATADIR, "reddit_comments.json")
+
+
+#############################
+# benchmarks
+
+def bench_json_loads(loops=400):
+    elapsed, _ = _bench_json_loads(loops)
+    return elapsed
+
+
+def _bench_json_loads(loops=400):
+    """Measure running json.loads() N times.
+
+    The target data is nearly 1100 JSON objects, each on a single line,
+    from a file.  The objects:
+    
+    * are all flat (no compound values)
+    * vary a little in number of properties, though none are big
+    * have a mix of values, both of type and size
+
+    Only the json.loads() calls are measured.  The following are not:
+
+    * reading the text from the file
+    * looping through the lines
+    """
+    with open(TARGET) as f:
+        s = f.read()
+    lines = s.splitlines()
+
+    elapsed = 0
+    times = []
+    for _ in range(loops):
+        # This is a macro benchmark for a Python implementation
+        # so "elapsed" covers more than just how long json.loads() takes.
+        t0 = pyperf.perf_counter()
+        for text in lines:
+            if not text:
+                continue
+            json.loads(text)
+        t1 = pyperf.perf_counter()
+
+        elapsed += t1 - t0
+        times.append(t0)
+    times.append(pyperf.perf_counter())
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_json_loads)
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of json"
+    runner.bench_time_func("json", bench_json_loads)
diff --git a/data/kinto_project/.coveragerc b/benchmarks/bm_kinto/data/.coveragerc
similarity index 100%
rename from data/kinto_project/.coveragerc
rename to benchmarks/bm_kinto/data/.coveragerc
diff --git a/data/kinto_project/CHANGES.txt b/benchmarks/bm_kinto/data/CHANGES.txt
similarity index 100%
rename from data/kinto_project/CHANGES.txt
rename to benchmarks/bm_kinto/data/CHANGES.txt
diff --git a/data/kinto_project/MANIFEST.in b/benchmarks/bm_kinto/data/MANIFEST.in
similarity index 100%
rename from data/kinto_project/MANIFEST.in
rename to benchmarks/bm_kinto/data/MANIFEST.in
diff --git a/data/kinto_project/README.txt b/benchmarks/bm_kinto/data/README.txt
similarity index 100%
rename from data/kinto_project/README.txt
rename to benchmarks/bm_kinto/data/README.txt
diff --git a/data/kinto_project/app.wsgi b/benchmarks/bm_kinto/data/app.wsgi
similarity index 100%
rename from data/kinto_project/app.wsgi
rename to benchmarks/bm_kinto/data/app.wsgi
diff --git a/data/kinto_project/config/kinto.ini b/benchmarks/bm_kinto/data/config/kinto.ini
similarity index 100%
rename from data/kinto_project/config/kinto.ini
rename to benchmarks/bm_kinto/data/config/kinto.ini
diff --git a/data/kinto_project/development.ini b/benchmarks/bm_kinto/data/development.ini
similarity index 100%
rename from data/kinto_project/development.ini
rename to benchmarks/bm_kinto/data/development.ini
diff --git a/data/kinto_project/kinto_project/__init__.py b/benchmarks/bm_kinto/data/kinto_project/__init__.py
similarity index 100%
rename from data/kinto_project/kinto_project/__init__.py
rename to benchmarks/bm_kinto/data/kinto_project/__init__.py
diff --git a/data/kinto_project/kinto_project/static/pyramid-16x16.png b/benchmarks/bm_kinto/data/kinto_project/static/pyramid-16x16.png
similarity index 100%
rename from data/kinto_project/kinto_project/static/pyramid-16x16.png
rename to benchmarks/bm_kinto/data/kinto_project/static/pyramid-16x16.png
diff --git a/data/kinto_project/kinto_project/static/pyramid.png b/benchmarks/bm_kinto/data/kinto_project/static/pyramid.png
similarity index 100%
rename from data/kinto_project/kinto_project/static/pyramid.png
rename to benchmarks/bm_kinto/data/kinto_project/static/pyramid.png
diff --git a/data/kinto_project/kinto_project/static/theme.css b/benchmarks/bm_kinto/data/kinto_project/static/theme.css
similarity index 100%
rename from data/kinto_project/kinto_project/static/theme.css
rename to benchmarks/bm_kinto/data/kinto_project/static/theme.css
diff --git a/data/kinto_project/kinto_project/templates/layout.jinja2 b/benchmarks/bm_kinto/data/kinto_project/templates/layout.jinja2
similarity index 100%
rename from data/kinto_project/kinto_project/templates/layout.jinja2
rename to benchmarks/bm_kinto/data/kinto_project/templates/layout.jinja2
diff --git a/data/kinto_project/kinto_project/templates/mytemplate.jinja2 b/benchmarks/bm_kinto/data/kinto_project/templates/mytemplate.jinja2
similarity index 100%
rename from data/kinto_project/kinto_project/templates/mytemplate.jinja2
rename to benchmarks/bm_kinto/data/kinto_project/templates/mytemplate.jinja2
diff --git a/data/kinto_project/kinto_project/tests.py b/benchmarks/bm_kinto/data/kinto_project/tests.py
similarity index 100%
rename from data/kinto_project/kinto_project/tests.py
rename to benchmarks/bm_kinto/data/kinto_project/tests.py
diff --git a/data/kinto_project/kinto_project/views.py b/benchmarks/bm_kinto/data/kinto_project/views.py
similarity index 100%
rename from data/kinto_project/kinto_project/views.py
rename to benchmarks/bm_kinto/data/kinto_project/views.py
diff --git a/data/kinto_project/nginx.conf b/benchmarks/bm_kinto/data/nginx.conf
similarity index 100%
rename from data/kinto_project/nginx.conf
rename to benchmarks/bm_kinto/data/nginx.conf
diff --git a/data/kinto_project/production.ini b/benchmarks/bm_kinto/data/production.ini
similarity index 100%
rename from data/kinto_project/production.ini
rename to benchmarks/bm_kinto/data/production.ini
diff --git a/data/kinto_project/pytest.ini b/benchmarks/bm_kinto/data/pytest.ini
similarity index 100%
rename from data/kinto_project/pytest.ini
rename to benchmarks/bm_kinto/data/pytest.ini
diff --git a/data/kinto_project/setup.py b/benchmarks/bm_kinto/data/setup.py
similarity index 100%
rename from data/kinto_project/setup.py
rename to benchmarks/bm_kinto/data/setup.py
diff --git a/data/kinto_project/uwsgi_params b/benchmarks/bm_kinto/data/uwsgi_params
similarity index 100%
rename from data/kinto_project/uwsgi_params
rename to benchmarks/bm_kinto/data/uwsgi_params
diff --git a/benchmarks/bm_kinto/legacyutils.py b/benchmarks/bm_kinto/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_kinto/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_kinto/netutils.py b/benchmarks/bm_kinto/netutils.py
new file mode 120000
index 0000000..3afa43f
--- /dev/null
+++ b/benchmarks/bm_kinto/netutils.py
@@ -0,0 +1 @@
+../.libs/netutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_kinto/pyproject.toml b/benchmarks/bm_kinto/pyproject.toml
new file mode 100644
index 0000000..72cdb08
--- /dev/null
+++ b/benchmarks/bm_kinto/pyproject.toml
@@ -0,0 +1,15 @@
+[project]
+name = "bm_kinto"
+dependencies = [
+    "kinto",
+    "uWSGI",
+    "pyramind",
+    #"pyramid_jinja2",
+    #"pyramid_debugtoolbar",
+    "waitress",
+    "requests",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/kinto_bench_requirements.txt b/benchmarks/bm_kinto/requirements.txt
similarity index 100%
rename from benchmarks/kinto_bench_requirements.txt
rename to benchmarks/bm_kinto/requirements.txt
diff --git a/benchmarks/bm_kinto/run_benchmark.py b/benchmarks/bm_kinto/run_benchmark.py
new file mode 100644
index 0000000..97122d3
--- /dev/null
+++ b/benchmarks/bm_kinto/run_benchmark.py
@@ -0,0 +1,90 @@
+import os
+import os.path
+import requests
+import shutil
+import subprocess
+import sys
+import urllib
+
+import pyperf
+import netutils
+
+
+PYTHON = os.path.abspath(sys.executable)
+UWSGI = os.path.join(os.path.dirname(PYTHON), "uwsgi")
+NGINX = shutil.which("nginx")
+
+SOCK = "/tmp/kinto.sock"
+ADDR = "127.0.0.1:8000"
+
+DATADIR = os.path.join(
+    os.path.abspath(os.path.dirname(__file__)),
+    "data",
+)
+SETUP_PY = os.path.join(DATADIR, "setup.py")
+PRODUCTION_INI = os.path.join(DATADIR, "production.ini")
+NGINX_CONF = os.path.join(DATADIR, "nginx.conf")
+
+
+#############################
+# benchmarks
+
+def bench_kinto(loops=5000):
+    elapsed, _ = _bench_kinto(loops)
+    return elapsed
+
+
+def _bench_kinto(loops=5000, legacy=False):
+    cmd = [PYTHON, SETUP_PY, "develop"]
+    proc = subprocess.run(
+        cmd,
+        cwd=DATADIR,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.STDOUT,
+    )
+    if proc.returncode != 0:
+        print(f'# running: {" ".join(cmd)} (in {DATADIR})')
+        subprocess.run(cmd, cwd=DATADIR, check=True)
+
+    cmd_app = [UWSGI, PRODUCTION_INI]
+    with netutils.serving(cmd_app, DATADIR, SOCK, kill=True):
+        cmd_web = [NGINX, "-c", NGINX_CONF, "-p", DATADIR]
+        with netutils.serving(cmd_web, DATADIR, ADDR, pause=0.010, quiet=False):
+            if legacy:
+                print(requests.get("http://localhost:8000/v1").text)
+                # print(requests.put("http://localhost:8000/v1/accounts/testuser", json={"data": {"password": "password1"}}).text)
+
+            start = pyperf.perf_counter()
+            elapsed = 0
+            times = []
+            for i in range(loops):
+                # This is a macro benchmark for a Python implementation
+                # so "elapsed" covers more than just how long a request takes.
+                t0 = pyperf.perf_counter()
+                # requests.get("http://localhost:8000/v1/").text
+                urllib.request.urlopen("http://localhost:8000/v1/").read()
+                t1 = pyperf.perf_counter()
+
+                elapsed += t1 - t0
+                times.append(t0)
+                if legacy and (i % 100 == 0):
+                    print(i, t0 - start)
+            times.append(pyperf.perf_counter())
+            if legacy:
+                total = times[-1] - start
+                print("%.2fs (%.3freq/s)" % (total, loops / total))
+            return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_kinto, legacyarg='legacy')
+
+    if NGINX is None:
+        raise Exception("nginx is not installed")
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of kinto"
+    runner.bench_time_func("kinto", bench_kinto)
diff --git a/benchmarks/bm_mypy/bm_mypyc.toml b/benchmarks/bm_mypy/bm_mypyc.toml
new file mode 100644
index 0000000..ca894a6
--- /dev/null
+++ b/benchmarks/bm_mypy/bm_mypyc.toml
@@ -0,0 +1,9 @@
+[project]
+name = "bm_mypyc"
+dependencies = [
+    "mypy",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+extra_opts = ["--loops", "50"]
diff --git a/data/mypy_target.py b/benchmarks/bm_mypy/data/mypy_target.py
similarity index 100%
rename from data/mypy_target.py
rename to benchmarks/bm_mypy/data/mypy_target.py
diff --git a/benchmarks/bm_mypy/legacyutils.py b/benchmarks/bm_mypy/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_mypy/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_mypy/pyproject.toml b/benchmarks/bm_mypy/pyproject.toml
new file mode 100644
index 0000000..5da0cd8
--- /dev/null
+++ b/benchmarks/bm_mypy/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "bm_mypy"
+dependencies = [
+    "mypy",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/mypy_bench_requirements.txt b/benchmarks/bm_mypy/requirements.txt
similarity index 100%
rename from benchmarks/mypy_bench_requirements.txt
rename to benchmarks/bm_mypy/requirements.txt
diff --git a/benchmarks/bm_mypy/run_benchmark.py b/benchmarks/bm_mypy/run_benchmark.py
new file mode 100644
index 0000000..67e8833
--- /dev/null
+++ b/benchmarks/bm_mypy/run_benchmark.py
@@ -0,0 +1,70 @@
+import os.path
+
+import pyperf
+from mypy.main import main
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+"""
+I tested it, and it looks like we get the same performance conclusions
+when we run on the same file multiple times as if we run on a set of files once.
+
+So for convenience run on a single file multiple times.
+"""
+TARGETS = [
+    os.path.join(DATADIR, "mypy_target.py"),
+]
+
+
+#############################
+# benchmarks
+
+def bench_mypy(loops=20):
+    elapsed, _ = _bench_mypy(loops)
+    return elapsed
+
+
+def _bench_mypy(loops=20, *, legacy=False):
+    """Meansure running mypy on a file N times.
+
+    The target file is large (over 2300 lines) with extensive use
+    of type hints.
+
+    Note that mypy's main() is called directly, which means
+    the measurement includes the time it takes to read the file
+    from disk.  Also, all output is discarded (sent to /dev/null).
+    """
+    elapsed = 0
+    times = []
+    with open(os.devnull, "w") as devnull:
+        for i in range(loops):
+            if legacy:
+                print(i)
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long main() takes.
+            t0 = pyperf.perf_counter()
+            try:
+                main(None, devnull, devnull, TARGETS)
+            except SystemExit:
+                pass
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+        times.append(pyperf.perf_counter())
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_mypy, legacyarg='legacy')
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of mypy types"
+    runner.bench_time_func("mypy", bench_mypy)
diff --git a/data/pycparser_target/README b/benchmarks/bm_pycparser/data/pycparser_target/README
similarity index 100%
rename from data/pycparser_target/README
rename to benchmarks/bm_pycparser/data/pycparser_target/README
diff --git a/data/pycparser_target/redis.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/redis.c.ppout
similarity index 100%
rename from data/pycparser_target/redis.c.ppout
rename to benchmarks/bm_pycparser/data/pycparser_target/redis.c.ppout
diff --git a/data/pycparser_target/sqlite-btree.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/sqlite-btree.c.ppout
similarity index 100%
rename from data/pycparser_target/sqlite-btree.c.ppout
rename to benchmarks/bm_pycparser/data/pycparser_target/sqlite-btree.c.ppout
diff --git a/data/pycparser_target/tccgen.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/tccgen.c.ppout
similarity index 100%
rename from data/pycparser_target/tccgen.c.ppout
rename to benchmarks/bm_pycparser/data/pycparser_target/tccgen.c.ppout
diff --git a/benchmarks/bm_pycparser/legacyutils.py b/benchmarks/bm_pycparser/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_pycparser/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_pycparser/pyproject.toml b/benchmarks/bm_pycparser/pyproject.toml
new file mode 100644
index 0000000..38020ba
--- /dev/null
+++ b/benchmarks/bm_pycparser/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "bm_pycparser"
+dependencies = [
+    "pycparser",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/pycparser_bench_requirements.txt b/benchmarks/bm_pycparser/requirements.txt
similarity index 100%
rename from benchmarks/pycparser_bench_requirements.txt
rename to benchmarks/bm_pycparser/requirements.txt
diff --git a/benchmarks/bm_pycparser/run_benchmark.py b/benchmarks/bm_pycparser/run_benchmark.py
new file mode 100644
index 0000000..eca4236
--- /dev/null
+++ b/benchmarks/bm_pycparser/run_benchmark.py
@@ -0,0 +1,79 @@
+import os
+import os.path
+
+import pyperf
+from pycparser import c_parser, c_ast
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+TARGET = os.path.join(DATADIR, "pycparser_target")
+
+
+def _iter_files(rootdir=TARGET):
+    for name in os.listdir(rootdir):
+        if not name.endswith(".ppout"):
+            continue
+        filename = os.path.join(TARGET, name)
+        with open(filename) as f:
+            yield (filename, f.read())
+
+
+def parse_files(files):
+    for _, text in files:
+        # We use a new parser each time because CParser objects
+        # aren't designed for re-use.
+        parser = c_parser.CParser()
+        ast = parser.parse(text, '')
+        assert isinstance(ast, c_ast.FileAST)
+
+
+#############################
+# benchmarks
+
+def bench_pycparser(loops=20):
+    elapsed, _ = _bench_pycparser(loops)
+    return elapsed
+
+
+def _bench_pycparser(loops=20):
+    """Measure running pycparser on several large C files N times.
+
+    The files are all relatively large, from well-known projects.
+    Each is already preprocessed.
+
+    Only the CParser.parse() calls are measured.  The following are not:
+
+    * finding the target files
+    * reading them from disk
+    * creating the CParser object
+    """
+    files = list(_iter_files())
+
+    elapsed = 0
+    times = []
+    for _ in range(loops):
+        times.append(pyperf.perf_counter())
+        # This is a macro benchmark for a Python implementation
+        # so "elapsed" covers more than just how long parser.parse() takes.
+        t0 = pyperf.perf_counter()
+        parse_files(files)
+        t1 = pyperf.perf_counter()
+
+        elapsed += t1 - t0
+    times.append(pyperf.perf_counter())
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_pycparser)
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of pycparser"
+    runner.bench_time_func("pycparser", bench_pycparser)
diff --git a/data/pylint_target/__init__.py b/benchmarks/bm_pylint/data/pylint_target/__init__.py
similarity index 100%
rename from data/pylint_target/__init__.py
rename to benchmarks/bm_pylint/data/pylint_target/__init__.py
diff --git a/data/pylint_target/dist.py b/benchmarks/bm_pylint/data/pylint_target/dist.py
similarity index 100%
rename from data/pylint_target/dist.py
rename to benchmarks/bm_pylint/data/pylint_target/dist.py
diff --git a/benchmarks/bm_pylint/legacyutils.py b/benchmarks/bm_pylint/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_pylint/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_pylint/pyproject.toml b/benchmarks/bm_pylint/pyproject.toml
new file mode 100644
index 0000000..207eec9
--- /dev/null
+++ b/benchmarks/bm_pylint/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "bm_pylint"
+dependencies = [
+    "pylint",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/pylint_bench_requirements.txt b/benchmarks/bm_pylint/requirements.txt
similarity index 100%
rename from benchmarks/pylint_bench_requirements.txt
rename to benchmarks/bm_pylint/requirements.txt
diff --git a/benchmarks/bm_pylint/run_benchmark.py b/benchmarks/bm_pylint/run_benchmark.py
new file mode 100644
index 0000000..40ad2c8
--- /dev/null
+++ b/benchmarks/bm_pylint/run_benchmark.py
@@ -0,0 +1,70 @@
+import os.path
+
+import pyperf
+#from pylint import epylint as lint
+from pylint.lint import Run
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+TARGETS = [
+    os.path.join(DATADIR, "pylint_target", "dist.py"),
+]
+
+
+def noop(*args, **kw):
+    pass
+
+
+class NullReporter:
+    path_strip_prefix = "/"
+    def __getattr__(self, attr):
+        return noop
+
+
+#############################
+# benchmarks
+
+def bench_pylint(loops=10):
+    elapsed, _ = _bench_pylint(loops)
+    return elapsed
+
+
+def _bench_pylint(loops=10):
+    """Measure running pylint on a file  N times.
+
+    The target file is a relatively large, complex one copied
+    from distutils in the stdlib.
+
+    pylint seems to speed up considerably as it progresses, and this
+    benchmark includes that.
+    """
+    elapsed = 0
+    times = []
+    for i in range(loops):
+        print(i)
+        # This is a macro benchmark for a Python implementation
+        # so "elapsed" covers more than just how long Run() takes.
+        t0 = pyperf.perf_counter()
+        reporter = NullReporter()
+        Run(TARGETS, exit=False, reporter=reporter)
+        t1 = pyperf.perf_counter()
+
+        elapsed += t1 - t0
+        times.append(t0)
+    times.append(pyperf.perf_counter())
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_pylint)
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of pylint"
+    runner.bench_time_func("pylint", bench_pylint)
diff --git a/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py b/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml b/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml
new file mode 100644
index 0000000..1225fd2
--- /dev/null
+++ b/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "bm_pytorch_alexnet_inference"
+dependencies = [
+    "torch",
+    "Pillow",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/pytorch_alexnet_inference_requirements.txt b/benchmarks/bm_pytorch_alexnet_inference/requirements.txt
similarity index 74%
rename from benchmarks/pytorch_alexnet_inference_requirements.txt
rename to benchmarks/bm_pytorch_alexnet_inference/requirements.txt
index d3cba68..49338ed 100644
--- a/benchmarks/pytorch_alexnet_inference_requirements.txt
+++ b/benchmarks/bm_pytorch_alexnet_inference/requirements.txt
@@ -2,3 +2,4 @@ future==0.18.2
 numpy==1.19.0
 Pillow==8.0.0
 torch==1.5.1
+torchvision==0.6.1
diff --git a/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py b/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py
new file mode 100644
index 0000000..5c46955
--- /dev/null
+++ b/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py
@@ -0,0 +1,90 @@
+import os
+import os.path
+import sys
+import urllib.request
+
+import pyperf
+from PIL import Image
+import torch
+from torchvision import transforms
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+if not os.path.exists(DATADIR):
+    os.mkdir(DATADIR)
+
+# TODO: Vendor this file (and the pytorch hub model) into the data dir,
+# to avoid network access and to pin the data for consistent results.
+URL = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
+FILENAME = os.path.join(DATADIR, "dog.jpg")
+
+
+#############################
+# benchmarks
+
+def bench_pytorch(loops=1000):
+    elapsed, _ = _bench_pytorch(loops)
+    return elapsed
+
+
+def _bench_pytorch(loops=1000, *, legacy=False):
+    """Measure using pytorch to transform an image N times.
+
+    This involves the following steps:
+
+    * load a pre-trained model (alexnet)
+    * mark it for evaluation
+    * download an image
+    * prepare it to be run through the model
+    * turn off gradients computation
+    * run the image through the model
+
+    Only that last step is measured (and repeated N times).
+    """
+    start = pyperf.perf_counter()
+    model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
+    # assert pyperf.perf_counter() - start < 3, "looks like we just did the first-time download, run this benchmark again to get a clean run"
+    model.eval()
+
+    urllib.request.urlretrieve(URL, FILENAME)
+    input_image = Image.open(FILENAME)
+    preprocess = transforms.Compose([
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    input_tensor = preprocess(input_image)
+    input_batch = input_tensor.unsqueeze(0)  # create a mini-batch as expected by the model
+
+    with torch.no_grad():
+        elapsed = 0
+        times = []
+        for i in range(loops):
+            if legacy and (i % 10 == 0):
+                print(i)
+            # This is a macro benchmark for a Python implementation
+            # so "elapsed" covers more than just how long model() takes.
+            t0 = pyperf.perf_counter()
+            output = model(input_batch)
+            t1 = pyperf.perf_counter()
+
+            elapsed += t1 - t0
+            times.append(t0)
+        times.append(pyperf.perf_counter())
+        return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_pytorch, legacyarg='legacy')
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of pytorch"
+    runner.bench_time_func("pytorch", bench_pytorch)
diff --git a/data/Makefile b/benchmarks/bm_thrift/data/Makefile
similarity index 100%
rename from data/Makefile
rename to benchmarks/bm_thrift/data/Makefile
diff --git a/data/addressbook.thrift b/benchmarks/bm_thrift/data/addressbook.thrift
similarity index 100%
rename from data/addressbook.thrift
rename to benchmarks/bm_thrift/data/addressbook.thrift
diff --git a/data/thrift/__init__.py b/benchmarks/bm_thrift/data/thrift/__init__.py
similarity index 100%
rename from data/thrift/__init__.py
rename to benchmarks/bm_thrift/data/thrift/__init__.py
diff --git a/data/thrift/addressbook/__init__.py b/benchmarks/bm_thrift/data/thrift/addressbook/__init__.py
similarity index 100%
rename from data/thrift/addressbook/__init__.py
rename to benchmarks/bm_thrift/data/thrift/addressbook/__init__.py
diff --git a/data/thrift/addressbook/constants.py b/benchmarks/bm_thrift/data/thrift/addressbook/constants.py
similarity index 100%
rename from data/thrift/addressbook/constants.py
rename to benchmarks/bm_thrift/data/thrift/addressbook/constants.py
diff --git a/data/thrift/addressbook/ttypes.py b/benchmarks/bm_thrift/data/thrift/addressbook/ttypes.py
similarity index 100%
rename from data/thrift/addressbook/ttypes.py
rename to benchmarks/bm_thrift/data/thrift/addressbook/ttypes.py
diff --git a/benchmarks/bm_thrift/legacyutils.py b/benchmarks/bm_thrift/legacyutils.py
new file mode 120000
index 0000000..644cca6
--- /dev/null
+++ b/benchmarks/bm_thrift/legacyutils.py
@@ -0,0 +1 @@
+../.libs/legacyutils.py
\ No newline at end of file
diff --git a/benchmarks/bm_thrift/pyproject.toml b/benchmarks/bm_thrift/pyproject.toml
new file mode 100644
index 0000000..8d12b9e
--- /dev/null
+++ b/benchmarks/bm_thrift/pyproject.toml
@@ -0,0 +1,9 @@
+[project]
+name = "bm_thrift"
+dependencies = [
+    "thrift",
+]
+dynamic = ["version"]
+
+[tool.pyperformance]
+inherits = ".."
diff --git a/benchmarks/thrift_bench_requirements.txt b/benchmarks/bm_thrift/requirements.txt
similarity index 100%
rename from benchmarks/thrift_bench_requirements.txt
rename to benchmarks/bm_thrift/requirements.txt
diff --git a/benchmarks/bm_thrift/run_benchmark.py b/benchmarks/bm_thrift/run_benchmark.py
new file mode 100644
index 0000000..9b45efd
--- /dev/null
+++ b/benchmarks/bm_thrift/run_benchmark.py
@@ -0,0 +1,96 @@
+# Adapted from https://raw.githubusercontent.com/Thriftpy/thriftpy2/master/benchmark/benchmark_apache_thrift_struct.py
+
+import os.path
+import sys
+
+import pyperf
+from thrift.TSerialization import serialize, deserialize
+from thrift.protocol.TBinaryProtocol import (
+    TBinaryProtocolFactory,
+    TBinaryProtocolAcceleratedFactory
+)
+
+
+DATADIR = os.path.join(
+    os.path.dirname(__file__),
+    "data",
+)
+# The target files were generated using the make file in the data dir.
+TARGET = os.path.join(DATADIR, "thrift")
+
+
+if TARGET not in sys.path:
+    sys.path.insert(0, TARGET)
+from addressbook import ttypes
+
+
+def make_addressbook():
+    phone1 = ttypes.PhoneNumber()
+    phone1.type = ttypes.PhoneType.MOBILE
+    phone1.number = '555-1212'
+    phone2 = ttypes.PhoneNumber()
+    phone2.type = ttypes.PhoneType.HOME
+    phone2.number = '555-1234'
+    person = ttypes.Person()
+    person.name = "Alice"
+    person.phones = [phone1, phone2]
+    person.created_at = 1400000000
+
+    ab = ttypes.AddressBook()
+    ab.people = {person.name: person}
+    return ab
+
+
+#############################
+# benchmarks
+
+def bench_thrift(loops=1000):
+    elapsed, _ = _bench_thrift(loops)
+    return elapsed
+
+
+def _bench_thrift(loops=1000):
+    """Measure using a thrift-generated library N times.
+
+    The target is a simple addressbook.  We measure the following:
+
+    * create an addressbook with 1 person in it
+    * serialize it
+    * deserialize it into a new addressbook
+
+    For each iteration we repeat this 100 times.
+    """
+    # proto_factory = TBinaryProtocolFactory()
+    proto_factory = TBinaryProtocolAcceleratedFactory()
+
+    elapsed = 0
+    times = []
+    for _ in range(loops):
+        # This is a macro benchmark for a Python implementation
+        # so "elapsed" covers more than just how long the Addressbook ops take.
+        t0 = pyperf.perf_counter()
+        for _ in range(100):
+            # First, create the addressbook.
+            ab = make_addressbook()
+            # Then, round-trip through serialization.
+            encoded = serialize(ab, proto_factory)
+            ab2 = ttypes.AddressBook()
+            deserialize(ab2, encoded, proto_factory)
+        t1 = pyperf.perf_counter()
+
+        elapsed += t1 - t0
+        times.append(t0)
+    times.append(pyperf.perf_counter())
+    return elapsed, times
+
+
+#############################
+# the script
+
+if __name__ == "__main__":
+    from legacyutils import maybe_handle_legacy
+    maybe_handle_legacy(_bench_thrift)
+
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Test the performance of thrift"
+    runner.bench_time_func("thrift", bench_thrift)
diff --git a/benchmarks/djangocms.py b/benchmarks/djangocms.py
deleted file mode 100644
index af0d3ca..0000000
--- a/benchmarks/djangocms.py
+++ /dev/null
@@ -1,129 +0,0 @@
-"""
-Django-cms test
-Sets up a djangocms installation, and hits '/' a number of times.
-'/' is not super interesting, but it still exercises a little bit of
-functionality; looking at cms/templates/cms/welcome.html, it seems
-to do a decent amount of template logic, as well as do some basic
-user auth.
-We could probably improve the flow though, perhaps by logging in
-and browsing around.
-"""
-
-import os
-import requests
-import socket
-import subprocess
-import sys
-import tempfile
-import time
-import json
-
-def setup():
-    """
-    Set up a djangocms installation.
-    Runs the initial bootstrapping without the db migration,
-    so that we can turn off sqlite synchronous and avoid fs time.
-    Rough testing shows that setting synchronous=OFF is basically
-    the same performance as running on /dev/shm
-    """
-
-    subprocess.check_call([exe.replace("python3", "djangocms"), "testsite", "--verbose", "--no-sync"])
-
-    with open("testsite/testsite/settings.py", "a") as f:
-        f.write("""
-from django.db.backends.signals import connection_created
-def set_no_sychronous(sender, connection, **kwargs):
-    if connection.vendor == 'sqlite':
-        cursor = connection.cursor()
-        cursor.execute('PRAGMA synchronous = OFF;')
-
-connection_created.connect(set_no_sychronous)
-""")
-    start = time.time()
-    subprocess.check_call([exe, "manage.py", "migrate"], cwd="testsite")
-    elapsed = time.time() - start
-    print("%.2fs to initialize db" % (elapsed,))
-
-def waitUntilUp(addr, timeout=10.0):
-    start = time.time()
-    while True:
-        try:
-            with socket.create_connection(addr) as sock:
-                return
-        except ConnectionRefusedError:
-            if time.time() > start + timeout:
-                raise Exception("Timeout reached when trying to connect")
-            time.sleep(0.001)
-
-def runbenchmark(n=800, out_file=None):
-    p = subprocess.Popen([exe, "manage.py", "runserver", "--noreload"], cwd="testsite", stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT)
-    try:
-        waitUntilUp(("127.0.0.1", 8000))
-
-        start = time.time()
-        times = []
-        for i in range(n):
-            times.append(time.time())
-            if i % 100 == 0:
-                print(i, time.time() - start)
-            requests.get("http://localhost:8000/").text
-        times.append(time.time())
-        elapsed = time.time() - start
-        print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed))
-
-        exitcode = p.poll()
-        assert exitcode is None, exitcode
-
-        if out_file:
-            json.dump(times, open(out_file, 'w'))
-
-    finally:
-        p.terminate()
-        p.wait()
-
-if __name__ == "__main__":
-    exe = sys.executable
-    # Hack: make sure this file gets run as "python3" so that perf will collate across different processes
-    if not exe.endswith('3'):
-        os.execv(exe + '3', [exe + '3'] + sys.argv)
-
-    os.environ["PATH"] = os.path.dirname(exe) + ":" + os.environ["PATH"]
-
-    """
-    Usage:
-        python djangocms.py
-        python djangocms.py --setup DIR
-        python djangocms.py --serve DIR
-
-    The first form creates a temporary directory, sets up djangocms in it,
-    serves out of it, and removes the directory.
-    The second form sets up a djangocms installation in the given directory.
-    The third form runs a benchmark out of an already-set-up directory
-    The second and third forms are useful if you want to benchmark the
-    initial migration phase separately from the second serving phase.
-    """
-    if "--setup" in sys.argv:
-        assert len(sys.argv) > 2
-        dir = sys.argv[-1]
-        os.makedirs(dir, exist_ok=True)
-        os.chdir(dir)
-        setup()
-    elif "--serve" in sys.argv:
-        assert len(sys.argv) > 2
-        os.chdir(sys.argv[-1])
-        runbenchmark()
-    else:
-        n = 800
-        if len(sys.argv) > 1:
-            n = int(sys.argv[1])
-        out_file = None
-        if len(sys.argv) > 2:
-            out_file = os.path.abspath(sys.argv[2])
-
-        # It might be interesting to put the temporary directory in /dev/shm,
-        # which makes the initial db migration about 20% faster.
-        with tempfile.TemporaryDirectory(prefix="djangocms_test_") as d:
-            os.chdir(d)
-
-            setup()
-            runbenchmark(n, out_file)
diff --git a/benchmarks/flaskblogging.py b/benchmarks/flaskblogging.py
deleted file mode 100644
index 40fa891..0000000
--- a/benchmarks/flaskblogging.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import json
-import os
-import requests
-import subprocess
-import sys
-import threading
-import time
-
-from djangocms import waitUntilUp
-
-if __name__ == "__main__":
-    exe = sys.executable
-
-    times = []
-
-    p = subprocess.Popen([exe, "../data/flaskblogging_serve.py"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.dirname(__file__))
-    try:
-        waitUntilUp(("127.0.0.1", 8000))
-
-        n = 1800
-        if len(sys.argv) > 1:
-            n = int(sys.argv[1])
-
-        start = time.time()
-        for i in range(n):
-            times.append(time.time())
-            if i % 100 == 0:
-                print(i, time.time() - start)
-            requests.get("http://localhost:8000/blog/").text
-        times.append(time.time())
-        elapsed = time.time() - start
-        print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed))
-
-        assert p.poll() is None, p.poll()
-
-    finally:
-        p.terminate()
-        p.wait()
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/gevent_bench_hub.py b/benchmarks/gevent_bench_hub.py
deleted file mode 100644
index 7e27737..0000000
--- a/benchmarks/gevent_bench_hub.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Benchmarks for hub primitive operations.
-
-Taken from https://github.com/gevent/gevent/blob/master/benchmarks/bench_hub.py
-Modified to remove perf and not need any command line arguments
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# import perf
-# from perf import perf_counter
-
-import gevent
-from greenlet import greenlet
-from greenlet import getcurrent
-
-
-N = 1000
-
-def bench_switch():
-
-    class Parent(type(gevent.get_hub())):
-        def run(self):
-            parent = self.parent
-            for _ in range(N):
-                parent.switch()
-
-    def child():
-        parent = getcurrent().parent
-        # Back to the hub, which in turn goes
-        # back to the main greenlet
-        for _ in range(N):
-            parent.switch()
-
-    hub = Parent(None, None)
-    child_greenlet = greenlet(child, hub)
-    for _ in range(N):
-        child_greenlet.switch()
-
-def bench_wait_ready():
-
-    class Watcher(object):
-        def start(self, cb, obj):
-            # Immediately switch back to the waiter, mark as ready
-            cb(obj)
-
-        def stop(self):
-            pass
-
-    watcher = Watcher()
-    hub = gevent.get_hub()
-
-    for _ in range(1000):
-        hub.wait(watcher)
-
-def bench_cancel_wait():
-
-    class Watcher(object):
-        active = True
-        callback = object()
-
-        def close(self):
-            pass
-
-    watcher = Watcher()
-    hub = gevent.get_hub()
-    loop = hub.loop
-
-    for _ in range(1000):
-        # Schedule all the callbacks.
-        hub.cancel_wait(watcher, None, True)
-
-    # Run them!
-    for cb in loop._callbacks:
-        if cb.callback:
-            cb.callback(*cb.args)
-            cb.stop() # so the real loop won't do it
-
-    # destroy the loop so we don't keep building these functions
-    # up
-    hub.destroy(True)
-
-def bench_wait_func_ready():
-    from gevent.hub import wait
-    class ToWatch(object):
-        def rawlink(self, cb):
-            cb(self)
-
-    watched_objects = [ToWatch() for _ in range(N)]
-
-    t0 = perf_counter()
-
-    wait(watched_objects)
-
-    return perf_counter() - t0
-
-def main():
-
-    runner = perf.Runner()
-
-    runner.bench_func('multiple wait ready',
-                      bench_wait_func_ready,
-                      inner_loops=N)
-
-    runner.bench_func('wait ready',
-                      bench_wait_ready,
-                      inner_loops=N)
-
-    runner.bench_func('cancel wait',
-                      bench_cancel_wait,
-                      inner_loops=N)
-
-    runner.bench_func('switch',
-                      bench_switch,
-                      inner_loops=N)
-
-if __name__ == '__main__':
-    # main()
-    for i in range(10000):
-        bench_switch()
diff --git a/benchmarks/gunicorn.py b/benchmarks/gunicorn.py
deleted file mode 100644
index aa39d5f..0000000
--- a/benchmarks/gunicorn.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import json
-import os
-import requests
-import subprocess
-import sys
-import threading
-import time
-
-from djangocms import waitUntilUp
-
-if __name__ == "__main__":
-    exe = sys.executable
-
-    times = []
-
-    p = subprocess.Popen([os.path.join(os.path.dirname(exe), "gunicorn"), "gunicorn_serve:main", "--bind", "127.0.0.1:8000", "-w", "1", "--worker-class", "aiohttp.GunicornWebWorker"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.join(os.path.dirname(__file__), "../data"))
-    try:
-        waitUntilUp(("127.0.0.1", 8000))
-
-        n = 3000
-        if len(sys.argv) > 1:
-            n = int(sys.argv[1])
-
-        start = time.time()
-        for i in range(n):
-            times.append(time.time())
-            if i % 100 == 0:
-                print(i, time.time() - start)
-            requests.get("http://localhost:8000/blog/").text
-        times.append(time.time())
-        elapsed = time.time() - start
-        print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed))
-
-        assert p.poll() is None, p.poll()
-
-    finally:
-        p.terminate()
-        p.wait()
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/json_bench.py b/benchmarks/json_bench.py
deleted file mode 100644
index c762cd9..0000000
--- a/benchmarks/json_bench.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import json
-import os
-import sys
-import time
-
-if __name__ == "__main__":
-    exe = sys.executable
-
-    times = []
-
-    with open(os.path.join(os.path.dirname(__file__), "../data/reddit_comments.json")) as f:
-        s = f.read()
-
-    data = s.split('\n')
-
-    n = 400
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-
-    times = []
-
-    for i in range(n):
-        times.append(time.time())
-        for s in data:
-            if not s:
-                continue
-            json.loads(s)
-    times.append(time.time())
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/kinto_bench.py b/benchmarks/kinto_bench.py
deleted file mode 100644
index ced23d7..0000000
--- a/benchmarks/kinto_bench.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import json
-import os
-import requests
-import subprocess
-import sys
-import threading
-import time
-import urllib
-
-from djangocms import waitUntilUp
-
-from os.path import join, abspath, dirname
-
-if __name__ == "__main__":
-    exe = sys.executable
-    def bin(name):
-        return join(dirname(exe), name)
-    def rel(path):
-        return abspath(join(dirname(__file__), path))
-
-    times = []
-
-    subprocess.check_call([abspath(exe), rel("../data/kinto_project/setup.py"), "develop"], cwd=rel("../data/kinto_project"), stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT)
-
-    try:
-        os.remove("/tmp/kinto.sock")
-    except FileNotFoundError:
-        pass
-    p1 = subprocess.Popen([bin("uwsgi"), rel("../data/kinto_project/production.ini")], cwd=rel("../data/kinto_project"), stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT)
-    # p1 = subprocess.Popen([bin("uwsgi"), rel("../data/kinto_project/production.ini")], cwd=rel("../data/kinto_project"))
-    while not os.path.exists("/tmp/kinto.sock"):
-        time.sleep(0.001)
-
-    # p2 = subprocess.Popen(["nginx", "-c", abspath("../data/kinto_project/nginx.conf"), "-p", abspath("../data/kinto_project")], cwd="../data/kinto_project", stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT)
-    p2 = subprocess.Popen(["nginx", "-c", rel("../data/kinto_project/nginx.conf"), "-p", rel("../data/kinto_project")], cwd=rel("../data/kinto_project"))
-
-    time.sleep(0.010)
-
-    try:
-        waitUntilUp(("127.0.0.1", 8000))
-
-        assert p1.poll() is None, p1.poll()
-        assert p2.poll() is None, p2.poll()
-
-        print(requests.get("http://localhost:8000/v1").text)
-        # print(requests.put("http://localhost:8000/v1/accounts/testuser", json={"data": {"password": "password1"}}).text)
-
-        n = 5000
-        if len(sys.argv) > 1:
-            n = int(sys.argv[1])
-
-        start = time.time()
-        for i in range(n):
-            times.append(time.time())
-            if i % 100 == 0:
-                print(i, time.time() - start)
-            # requests.get("http://localhost:8000/v1/").text
-            urllib.request.urlopen("http://localhost:8000/v1/").read()
-        times.append(time.time())
-        elapsed = time.time() - start
-        print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed))
-
-        assert p1.poll() is None, p1.poll()
-        assert p2.poll() is None, p2.poll()
-
-    finally:
-        p1.terminate()
-        p1.kill()
-        p1.wait()
-        # p2.kill()
-        p2.terminate()
-        p2.wait()
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/mypy_bench.py b/benchmarks/mypy_bench.py
deleted file mode 100644
index b704e8c..0000000
--- a/benchmarks/mypy_bench.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import json
-import os
-import sys
-import time
-
-"""
-I tested it, and it looks like we get the same performance conclusions
-when we run on the same file multiple times as if we run on a set of files once.
-
-So for convenience run on a single file multiple times.
-"""
-
-if __name__ == "__main__":
-    from mypy.main import main
-
-    n = 20
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-    target = os.path.join(os.path.dirname(__file__), "../data/mypy_target.py")
-
-    times = []
-    devnull = open("/dev/null", "w")
-    for i in range(n):
-        times.append(time.time())
-        print(i)
-        try:
-            main(None, devnull, devnull, [target])
-        except SystemExit:
-            pass
-    times.append(time.time())
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/pycparser_bench.py b/benchmarks/pycparser_bench.py
deleted file mode 100644
index 9a01cf4..0000000
--- a/benchmarks/pycparser_bench.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import json
-import os
-import sys
-import time
-
-from pycparser import c_parser, c_ast
-
-def parse_files(files):
-    for code in files:
-        parser = c_parser.CParser()
-        ast = parser.parse(code, '')
-        assert isinstance(ast, c_ast.FileAST)
-
-if __name__ == "__main__":
-    n = 20
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-
-    files = []
-    directory = os.path.abspath(__file__ + "/../../data/pycparser_target")
-    for filename in os.listdir(directory):
-        filename = os.path.join(directory, filename)
-        if not filename.endswith(".ppout"):
-            continue
-        with open(filename) as f:
-            files.append(f.read())
-
-    times = []
-    for i in range(n):
-        times.append(time.time())
-        
-        parse_files(files)
-
-    times.append(time.time())
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/pylint_bench.py b/benchmarks/pylint_bench.py
deleted file mode 100644
index 6736b88..0000000
--- a/benchmarks/pylint_bench.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import json
-import os
-import subprocess
-import sys
-import time
-
-from pylint import epylint as lint
-from pylint.lint import Run
-
-"""
-pylint benchmark
-
-pylint seems to speed up considerably as it progresses, and this
-benchmark includes that
-"""
-
-if __name__ == "__main__":
-    def noop(*args, **kw):
-        pass
-    class NullReporter:
-        path_strip_prefix = "/"
-        def __getattr__(self, attr):
-            return noop
-
-    n = 10
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-
-    times = []
-    for i in range(n):
-        times.append(time.time())
-        print(i)
-        Run([os.path.join(os.path.dirname(__file__), "../data/pylint_target/dist.py")], exit=False, reporter=NullReporter())
-    times.append(time.time())
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/pytorch_alexnet_inference.py b/benchmarks/pytorch_alexnet_inference.py
deleted file mode 100644
index 0ccf648..0000000
--- a/benchmarks/pytorch_alexnet_inference.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import json
-import time
-import torch
-import urllib
-import sys
-
-if __name__ == "__main__":
-    start = time.time()
-    model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True)
-    # assert time.time() - start < 3, "looks like we just did the first-time download, run this benchmark again to get a clean run"
-    model.eval()
-
-    url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
-    urllib.request.urlretrieve(url, filename)
-
-    from PIL import Image
-    from torchvision import transforms
-    input_image = Image.open(filename)
-    preprocess = transforms.Compose([
-        transforms.Resize(256),
-        transforms.CenterCrop(224),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-    ])
-    input_tensor = preprocess(input_image)
-    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
-
-    n = 1000
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-
-    with torch.no_grad():
-        times = []
-        for i in range(n):
-            times.append(time.time())
-            if i % 10 == 0:
-                print(i)
-            output = model(input_batch)
-        times.append(time.time())
-    print((len(times) - 1) / (times[-1] - times[0]) , "/s")
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
diff --git a/benchmarks/thrift_bench.py b/benchmarks/thrift_bench.py
deleted file mode 100644
index 68d5a69..0000000
--- a/benchmarks/thrift_bench.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Adapted from https://raw.githubusercontent.com/Thriftpy/thriftpy2/master/benchmark/benchmark_apache_thrift_struct.py
-
-import json
-import time
-
-from thrift.TSerialization import serialize, deserialize
-from thrift.protocol.TBinaryProtocol import (
-    TBinaryProtocolFactory,
-    TBinaryProtocolAcceleratedFactory
-)
-
-import os
-import sys
-sys.path.append(os.path.join(os.path.dirname(__file__), "../data/thrift"))
-from addressbook import ttypes
-
-
-def make_addressbook():
-    phone1 = ttypes.PhoneNumber()
-    phone1.type = ttypes.PhoneType.MOBILE
-    phone1.number = '555-1212'
-    phone2 = ttypes.PhoneNumber()
-    phone2.type = ttypes.PhoneType.HOME
-    phone2.number = '555-1234'
-    person = ttypes.Person()
-    person.name = "Alice"
-    person.phones = [phone1, phone2]
-    person.created_at = 1400000000
-
-    ab = ttypes.AddressBook()
-    ab.people = {person.name: person}
-    return ab
-
-
-def main():
-    # proto_factory = TBinaryProtocolFactory()
-    proto_factory = TBinaryProtocolAcceleratedFactory()
-
-    n = 1000
-    if len(sys.argv) > 1:
-        n = int(sys.argv[1])
-
-    times = []
-
-    for i in range(n):
-        times.append(time.time())
-        for j in range(100):
-            ab = make_addressbook()
-            encoded = serialize(ab, proto_factory)
-            ab2 = ttypes.AddressBook()
-            deserialize(ab2, encoded, proto_factory)
-    times.append(time.time())
-
-    if len(sys.argv) > 2:
-        json.dump(times, open(sys.argv[2], 'w'))
-
-if __name__ == "__main__":
-    main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..3901717
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "python-macrobenchmarks"
+version = "0.9.0"  # XXX an arbitrary value; the repo doesn't have one
+description = "Pyston benchmarks"
+#requires-python = ">=3.8"
+dependencies = ["pyperf"]
+urls = {repository = "https://github.com/pyston/python-macrobenchmarks"}
+
+[tool.pyperformance]
+manifest = "benchmarks/MANIFEST"
diff --git a/run_all.sh b/run_all.sh
index 27df37b..c1eeafb 100755
--- a/run_all.sh
+++ b/run_all.sh
@@ -13,9 +13,22 @@ set -x
 mkdir -p results
 
 ENV=/tmp/macrobenchmark_env
-for bench in flaskblogging djangocms mypy_bench pylint_bench pycparser_bench pytorch_alexnet_inference gunicorn aiohttp thrift_bench gevent_bench_hub kinto_bench; do
+for bench in flaskblogging djangocms mypy pylint pycparser pytorch_alexnet_inference gunicorn aiohttp thrift gevent_hub kinto; do
+    case $bench in
+        gevent_hub)
+            outname=gevent_bench_hub
+            ;;
+        mypy|pylint|pycparser|thrift|kinto)
+            outname=${bench}_bench
+            ;;
+        *)
+            outname=$bench
+            ;;
+    esac
+
     rm -rf $ENV
     $BINARY -m venv $ENV
-    $ENV/bin/pip install -r $(dirname $0)/benchmarks/${bench}_requirements.txt
-    /usr/bin/time --verbose --output=results/${bench}.out $ENV/bin/python $(dirname $0)/benchmarks/${bench}.py
+    $ENV/bin/pip install pyperf==2.2.0
+    $ENV/bin/pip install -r $(dirname $0)/benchmarks/bm_${bench}/requirements.txt
+    /usr/bin/time --verbose --output=results/${outname}.out $ENV/bin/python $(dirname $0)/benchmarks/bm_${bench}/run_benchmark.py --legacy
 done
diff --git a/run_mypy.sh b/run_mypy.sh
index 363fc7c..d030b5f 100644
--- a/run_mypy.sh
+++ b/run_mypy.sh
@@ -24,13 +24,14 @@ rm -rf /tmp/mypy
 git clone --depth 1 --branch v0.790 https://github.com/python/mypy/ /tmp/mypy
 cd /tmp/mypy
 
+$ENV/bin/pip install pyperf==2.2.0
 $ENV/bin/pip install -r mypy-requirements.txt
 $ENV/bin/pip install --upgrade setuptools
 git submodule update --init mypy/typeshed
 $ENV/bin/python setup.py --use-mypyc install
 
 cd -
-time $ENV/bin/python benchmarks/mypy_bench.py 50
-time $ENV/bin/python benchmarks/mypy_bench.py 50
-time $ENV/bin/python benchmarks/mypy_bench.py 50
+time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50
+time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50
+time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50