diff --git a/.gitignore b/.gitignore index 6cb24dd..8a120cd 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,4 @@ dmypy.json .pyre/ results +benchmarks/bm_pytorch_alexnet_inference/data/dog.jpg diff --git a/README.md b/README.md index b0b1524..a7b9ef0 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,21 @@ # python-macrobenchmarks A collection of macro benchmarks for the Python programming language + + +## usage + +```shell +# Run the default benchmarks: +python3 -m pyperformance run --manifest $PWD/benchmarks/MANIFEST +``` + +The benchmarks can still be run without pyperformance. This will produce + the old results format. + +```shell +# Run the benchmarks: +sh ./run_all.sh + +# Run the mypy benchmark using mypyc: +sh ./run_mypy.sh +``` diff --git a/benchmarks/.libs/legacyutils.py b/benchmarks/.libs/legacyutils.py new file mode 100644 index 0000000..c91e136 --- /dev/null +++ b/benchmarks/.libs/legacyutils.py @@ -0,0 +1,22 @@ +import json +import sys + + +def maybe_handle_legacy(bench_func, *args, loopsarg='loops', legacyarg=None): + if '--legacy' not in sys.argv: + return + argv = list(sys.argv[1:]) + argv.remove('--legacy') + + kwargs = {} + if legacyarg: + kwargs[legacyarg] = True + if argv: + assert loopsarg + kwargs[loopsarg] = int(argv[0]) + + _, times = bench_func(*args, **kwargs) + if len(argv) > 1: + json.dump(times, open(argv[1], 'w')) + + sys.exit(0) diff --git a/benchmarks/.libs/netutils.py b/benchmarks/.libs/netutils.py new file mode 100644 index 0000000..9bd9833 --- /dev/null +++ b/benchmarks/.libs/netutils.py @@ -0,0 +1,88 @@ +import contextlib +import ipaddress +import os.path +import socket +import subprocess +import time + + +@contextlib.contextmanager +def serving(argv, sitedir, addr, *, + pause=None, + kill=False, + quiet=True, + ): + if os.path.exists(addr): + sock = addr + addr = None + try: + os.remove(sock) + except FileNotFoundError: + pass + else: + sock = None + + p = subprocess.Popen( + argv, + cwd=sitedir, + stdout=subprocess.DEVNULL if quiet else None, + stderr=subprocess.STDOUT if quiet else None, + ) + try: + if pause: + time.sleep(pause) + if not sock: + try: + waitUntilUp(addr) + except NotImplementedError: + sock = addr + addr = None + if sock: + while not os.path.exists(sock): + time.sleep(0.001) + assert p.poll() is None, p.poll() + yield + assert p.poll() is None, p.poll() + finally: + p.terminate() + if kill: + p.kill() + p.wait() + + +def waitUntilUp(addr, timeout=10.0): + end = time.time() + timeout + addr = parse_socket_addr(addr) + started = False + current = time.time() + while not started or current <= end: + try: + with socket.create_connection(addr) as sock: + return + except ConnectionRefusedError: + time.sleep(0.001) + started = True + current = time.time() + raise Exception('Timeout reached when trying to connect') + + +def parse_socket_addr(addr, *, resolve=True): + if not isinstance(addr, str): + raise NotImplementedError(addr) + host, _, port = addr.partition(':') + + if not host: + raise NotImplementedError(addr) + try: + host = ipaddress.ip_address(host) + except ValueError: + raise NotImplementedError(addr) + host = str(host) + + if not port: + raise NotImplementedError(addr) + if not port.isdigit(): + raise NotImplementedError(addr) + port = int(port) + + return (host, port) diff --git a/benchmarks/MANIFEST b/benchmarks/MANIFEST new file mode 100644 index 0000000..7af33eb --- /dev/null +++ b/benchmarks/MANIFEST @@ -0,0 +1,19 @@ +[benchmarks] + +name metafile +aiohttp +djangocms +flaskblogging +gevent_hub +gunicorn +json +kinto +mypy +mypyc +pycparser +pylint +pytorch_alexnet_inference +thrift + +[group default] +-mypyc diff --git a/benchmarks/aiohttp.py b/benchmarks/aiohttp.py deleted file mode 100644 index 45f26ca..0000000 --- a/benchmarks/aiohttp.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import os -import requests -import subprocess -import sys -import threading -import time - -from djangocms import waitUntilUp - -if __name__ == "__main__": - exe = sys.executable - - times = [] - - p = subprocess.Popen([exe, "gunicorn_serve.py"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.join(os.path.dirname(__file__), "../data")) - try: - waitUntilUp(("127.0.0.1", 8080)) - - n = 3000 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - start = time.time() - for i in range(n): - times.append(time.time()) - if i % 100 == 0: - print(i, time.time() - start) - requests.get("http://localhost:8080/blog/").text - times.append(time.time()) - elapsed = time.time() - start - print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed)) - - assert p.poll() is None, p.poll() - - finally: - p.terminate() - p.wait() - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/base.toml b/benchmarks/base.toml new file mode 120000 index 0000000..1e11d78 --- /dev/null +++ b/benchmarks/base.toml @@ -0,0 +1 @@ +../pyproject.toml \ No newline at end of file diff --git a/data/gunicorn_serve.py b/benchmarks/bm_aiohttp/data/serve.py similarity index 100% rename from data/gunicorn_serve.py rename to benchmarks/bm_aiohttp/data/serve.py diff --git a/benchmarks/bm_aiohttp/legacyutils.py b/benchmarks/bm_aiohttp/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_aiohttp/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_aiohttp/netutils.py b/benchmarks/bm_aiohttp/netutils.py new file mode 120000 index 0000000..3afa43f --- /dev/null +++ b/benchmarks/bm_aiohttp/netutils.py @@ -0,0 +1 @@ +../.libs/netutils.py \ No newline at end of file diff --git a/benchmarks/bm_aiohttp/pyproject.toml b/benchmarks/bm_aiohttp/pyproject.toml new file mode 100644 index 0000000..dbe5021 --- /dev/null +++ b/benchmarks/bm_aiohttp/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "bm_aiohttp" +dependencies = [ + "aiohttp", + "gunicorn", + "requests", + "uvloop", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/aiohttp_requirements.txt b/benchmarks/bm_aiohttp/requirements.txt similarity index 100% rename from benchmarks/aiohttp_requirements.txt rename to benchmarks/bm_aiohttp/requirements.txt diff --git a/benchmarks/bm_aiohttp/run_benchmark.py b/benchmarks/bm_aiohttp/run_benchmark.py new file mode 100644 index 0000000..bf3ecf5 --- /dev/null +++ b/benchmarks/bm_aiohttp/run_benchmark.py @@ -0,0 +1,70 @@ +import os.path +import requests +import sys + +import pyperf +import netutils + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +ARGV = [sys.executable, "serve.py"] + + +############################# +# benchmarks + +def bench_aiohttp_requests(loops=3000): + elapsed, _ = _bench_aiohttp_requests(loops) + return elapsed + + +def _bench_aiohttp_requests(loops=3000, legacy=False): + """Measure N HTTP requests to a local server. + + Note that the server is freshly started here. + + Only the time for requests is measured here. The following are not: + + * preparing the site the server will serve + * starting the server + * stopping the server + + Hence this should be used with bench_time_func() + insted of bench_func(). + """ + start = pyperf.perf_counter() + elapsed = 0 + times = [] + with netutils.serving(ARGV, DATADIR, "127.0.0.1:8080"): + requests_get = requests.get + for i in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long a request takes. + t0 = pyperf.perf_counter() + requests_get("http://localhost:8080/blog/").text + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + if legacy and (i % 100 == 0): + print(i, t0 - start) + times.append(pyperf.perf_counter()) + if legacy: + total = times[-1] - start + print("%.2fs (%.3freq/s)" % (total, loops / total)) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_aiohttp_requests, legacyarg='legacy') + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of aiohttp" + runner.bench_time_func("aiohttp", bench_aiohttp_requests) diff --git a/benchmarks/bm_djangocms/legacyutils.py b/benchmarks/bm_djangocms/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_djangocms/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_djangocms/netutils.py b/benchmarks/bm_djangocms/netutils.py new file mode 120000 index 0000000..3afa43f --- /dev/null +++ b/benchmarks/bm_djangocms/netutils.py @@ -0,0 +1 @@ +../.libs/netutils.py \ No newline at end of file diff --git a/benchmarks/bm_djangocms/pyproject.toml b/benchmarks/bm_djangocms/pyproject.toml new file mode 100644 index 0000000..84a16e4 --- /dev/null +++ b/benchmarks/bm_djangocms/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "bm_djangocms" +dependencies = [ + "Django", + "django-cms", + "djangocms-bootstrap4", + "djangocms-file", + "djangocms-googlemap", + "djangocms-installer", + "djangocms-snippet", + "djangocms-style", + "djangocms-video", + "requests", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/djangocms_requirements.txt b/benchmarks/bm_djangocms/requirements.txt similarity index 100% rename from benchmarks/djangocms_requirements.txt rename to benchmarks/bm_djangocms/requirements.txt diff --git a/benchmarks/bm_djangocms/run_benchmark.py b/benchmarks/bm_djangocms/run_benchmark.py new file mode 100644 index 0000000..e02fa60 --- /dev/null +++ b/benchmarks/bm_djangocms/run_benchmark.py @@ -0,0 +1,250 @@ +""" +Django-cms test +Sets up a djangocms installation, and hits '/' a number of times. +'/' is not super interesting, but it still exercises a little bit of +functionality; looking at cms/templates/cms/welcome.html, it seems +to do a decent amount of template logic, as well as do some basic +user auth. +We could probably improve the flow though, perhaps by logging in +and browsing around. +""" + +import contextlib +import os +import os.path +import requests +import shutil +import subprocess +import sys +import tempfile + +import pyperf +import netutils + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +PID_FILE = os.path.join(DATADIR, "setup.pid") +# It might be interesting to put the temporary directory in /dev/shm, +# which makes the initial db migration about 20% faster. +TEMP_DIR = None +TEMP_PREFIX = "djangocms_bench_" + +INNER_LOOPS = 800 + +# site +SITE_NAME = "testsite" +SETTINGS = """ +from django.db.backends.signals import connection_created +def set_no_sychronous(sender, connection, **kwargs): + if connection.vendor == 'sqlite': + cursor = connection.cursor() + cursor.execute('PRAGMA synchronous = OFF;') + +connection_created.connect(set_no_sychronous) +""" + +# django commands +DJANGOCMS = os.path.join( + os.path.dirname(sys.executable), + "djangocms", +) +ARGV_CREATE = [DJANGOCMS, SITE_NAME, "--verbose", "--no-sync"] +ARGV_MIGRATE = [sys.executable, "manage.py", "migrate"] +ARGV_SERVE = [sys.executable, "manage.py", "runserver", "--noreload"] + + +def setup(rootdir): + """ + Set up a djangocms installation. + Runs the initial bootstrapping without the db migration, + so that we can turn off sqlite synchronous and avoid fs time. + Rough testing shows that setting synchronous=OFF is basically + the same performance as running on /dev/shm. + """ + sitedir = os.path.join(rootdir, SITE_NAME) # This is where Django puts it. + + # Delete the site dir if it already exists. + if os.path.exists(sitedir): + shutil.rmtree(datadir, ignore_errors=False) + + # First, create the site. + subprocess.check_call(ARGV_CREATE, cwd=rootdir) + + # Add customizations. + settingsfile = os.path.join(sitedir, SITE_NAME, "settings.py") + with open(settingsfile, "a") as f: + f.write(SETTINGS) + + # Finalize the site. + t0 = pyperf.perf_counter() + subprocess.check_call(ARGV_MIGRATE, cwd=sitedir) + elapsed = pyperf.perf_counter() - t0 + + return sitedir, elapsed + + +# This is a generic util that might make sense to put in a separate lib. +def _ensure_python_on_PATH(python=sys.executable): + PATH = os.environ["PATH"].split(os.pathsep) + PATH.insert(0, os.path.dirname(python)) + os.environ["PATH"] = os.pathsep.join(PATH) + + +@contextlib.contextmanager +def _ensure_datadir(datadir, preserve=True): + if datadir: + try: + os.makedirs(datadir) + except FileExistsError: + if preserve is None: + preserve = True + elif not preserve: + raise NotImplementedError(datadir) + else: + datadir = tempfile.mkdtemp(prefix=TEMP_PREFIX, dir=TEMP_DIR) + + try: + yield datadir + finally: + if not preserve: + shutil.rmtree(datadir, ignore_errors=True) + + +############################# +# benchmarks + +def bench_djangocms_requests(sitedir, loops=INNER_LOOPS): + elapsed, _ = _bench_djangocms_requests(loops) + return elapsed + + +def _bench_djangocms_requests(sitedir, loops=INNER_LOOPS, legacy=False): + """Measure N HTTP requests to a local server. + + Note that the server is freshly started here. + + Only the time for requests is measured here. The following are not: + + * preparing the site the server will serve + * starting the server + * stopping the server + + Hence this should be used with bench_time_func() + insted of bench_func(). + """ + start = pyperf.perf_counter() + elapsed = 0 + times = [] + with netutils.serving(ARGV_SERVE, sitedir, "127.0.0.1:8000"): + for i in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long a request takes. + t0 = pyperf.perf_counter() + requests.get("http://localhost:8000/").text + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + if legacy and (i % 100 == 0): + print(i, t0 - start) + times.append(pyperf.perf_counter()) + if legacy: + total = times[-1] - start + print("%.2fs (%.3freq/s)" % (total, loops / total)) + return elapsed, times + + +# We can't set "add_cmdline_args" on pyperf.Runner +# once we've created one. We work around this with a subclass. + +class _Runner(pyperf.Runner): + datadir = None + + def __init__(self): + def add_worker_args(cmd, _): + assert self.datadir + cmd.extend([ + '--serve', self.datadir, + ]) + super().__init__( + add_cmdline_args=add_worker_args, + ) + + +############################# +# the script + +if __name__ == "__main__": + """ + Usage: + python benchmarks/bm_djangocms/run_benchmark.py + python benchmarks/bm_djangocms/run_benchmark.py --setup DIR + python benchmarks/bm_djangocms/run_benchmark.py --serve DIR + + The first form creates a temporary directory, sets up djangocms in it, + serves out of it, and removes the directory. + The second form sets up a djangocms installation in the given directory. + The third form runs the benchmark out of an already-set-up directory + The second and third forms are useful if you want to benchmark the + initial migration phase separately from the second serving phase. + """ + runner = _Runner() + runner.metadata['description'] = "Test the performance of a Django data migration" + + # Parse the CLI args. + runner.argparser.add_argument("--legacy", action='store_true') + group = runner.argparser.add_mutually_exclusive_group() + group.add_argument("--setup") + group.add_argument("--serve") + args = runner.argparser.parse_args() + + if args.setup is not None: + args.datadir = args.setup + args.setup = True + args.serve = False + elif args.serve is not None: + args.datadir = args.serve + args.setup = False + args.serve = True + if not os.path.exists(args.datadir): + cmd = f"{sys.executable} {sys.argv[0]} --setup {args.datadir}?" + sys.exit(f"ERROR: Did you forget to run {cmd}?") + else: + args.datadir = None + args.setup = True + args.serve = True + + # DjangoCMS looks for Python on $PATH? + _ensure_python_on_PATH() + + # Get everything ready and then perform the requested operations. + preserve = True if args.setup and not args.serve else None + with _ensure_datadir(args.datadir, preserve) as datadir: + # First, set up the site. + if args.setup: + sitedir, elapsed = setup(datadir) + if args.legacy: + print("%.2fs to initialize db" % (elapsed,)) + print(f"site created in {sitedir}") + if not args.serve: + print(f"now run {sys.executable} {sys.argv[0]} --serve {datadir}") + else: + # This is what a previous call to setup() would have returned. + sitedir = os.path.join(datadir, SITE_NAME) + + # Then run the benchmark. + if args.serve: + if args.legacy: + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_djangocms_requests, sitedir, legacyarg='legacy') + sys.exit(0) + + runner.datadir = datadir + + def time_func(loops, *args): + return bench_djangocms_requests(*args, loops=loops) + runner.bench_time_func("djangocms", time_func, sitedir, + inner_loops=INNER_LOOPS) diff --git a/data/flaskblogging_serve.py b/benchmarks/bm_flaskblogging/data/serve.py similarity index 100% rename from data/flaskblogging_serve.py rename to benchmarks/bm_flaskblogging/data/serve.py diff --git a/benchmarks/bm_flaskblogging/legacyutils.py b/benchmarks/bm_flaskblogging/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_flaskblogging/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_flaskblogging/netutils.py b/benchmarks/bm_flaskblogging/netutils.py new file mode 120000 index 0000000..3afa43f --- /dev/null +++ b/benchmarks/bm_flaskblogging/netutils.py @@ -0,0 +1 @@ +../.libs/netutils.py \ No newline at end of file diff --git a/benchmarks/bm_flaskblogging/pyproject.toml b/benchmarks/bm_flaskblogging/pyproject.toml new file mode 100644 index 0000000..f07e1e4 --- /dev/null +++ b/benchmarks/bm_flaskblogging/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "bm_flaskblogging" +dependencies = [ + "Flask", + "Flask-Blogging", + "requests", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/flaskblogging_requirements.txt b/benchmarks/bm_flaskblogging/requirements.txt similarity index 100% rename from benchmarks/flaskblogging_requirements.txt rename to benchmarks/bm_flaskblogging/requirements.txt diff --git a/benchmarks/bm_flaskblogging/run_benchmark.py b/benchmarks/bm_flaskblogging/run_benchmark.py new file mode 100644 index 0000000..45089e0 --- /dev/null +++ b/benchmarks/bm_flaskblogging/run_benchmark.py @@ -0,0 +1,70 @@ +import os.path +import requests +import sys + +import pyperf +import netutils + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +ARGV = [sys.executable, "serve.py"] + + +############################# +# benchmarks + +def bench_flask_requests(loops=1800): + elapsed, _ = _bench_flask_requests(loops) + return elapsed + + +def _bench_flask_requests(loops=1800, legacy=False): + """Measure N HTTP requests to a local server. + + Note that the server is freshly started here. + + Only the time for requests is measured here. The following are not: + + * preparing the site the server will serve + * starting the server + * stopping the server + + Hence this should be used with bench_time_func() + insted of bench_func(). + """ + start = pyperf.perf_counter() + elapsed = 0 + times = [] + with netutils.serving(ARGV, DATADIR, "127.0.0.1:8000"): + requests_get = requests.get + for i in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long a request takes. + t0 = pyperf.perf_counter() + requests_get("http://localhost:8000/blog/").text + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + if legacy and (i % 100 == 0): + print(i, t0 - start) + times.append(pyperf.perf_counter()) + if legacy: + total = times[-1] - start + print("%.2fs (%.3freq/s)" % (total, loops / total)) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_flask_requests, legacyarg='legacy') + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of flask" + runner.bench_time_func("flaskblogging", bench_flask_requests) diff --git a/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml b/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml new file mode 100644 index 0000000..40bbf1e --- /dev/null +++ b/benchmarks/bm_gevent_hub/bm_gevent_cancel_wait.toml @@ -0,0 +1,7 @@ +[project] +name = "bm_gevent_cancel_wait" +dependencies = ["gevent"] +dynamic = ["version"] + +[tool.pyperformance] +extra_opts = ["gevent_cancel_wait"] diff --git a/benchmarks/bm_gevent_hub/bm_gevent_switch.toml b/benchmarks/bm_gevent_hub/bm_gevent_switch.toml new file mode 100644 index 0000000..3f96f66 --- /dev/null +++ b/benchmarks/bm_gevent_hub/bm_gevent_switch.toml @@ -0,0 +1,7 @@ +[project] +name = "bm_gevent_switch" +dependencies = ["gevent"] +dynamic = ["version"] + +[tool.pyperformance] +extra_opts = ["gevent_switch"] diff --git a/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml b/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml new file mode 100644 index 0000000..11bc9c6 --- /dev/null +++ b/benchmarks/bm_gevent_hub/bm_gevent_wait_func_ready.toml @@ -0,0 +1,7 @@ +[project] +name = "bm_gevent_wait_func_ready" +dependencies = ["gevent"] +dynamic = ["version"] + +[tool.pyperformance] +extra_opts = ["gevent_wait_func_ready"] diff --git a/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml b/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml new file mode 100644 index 0000000..6e673c7 --- /dev/null +++ b/benchmarks/bm_gevent_hub/bm_gevent_wait_ready.toml @@ -0,0 +1,7 @@ +[project] +name = "bm_gevent_wait_ready" +dependencies = ["gevent"] +dynamic = ["version"] + +[tool.pyperformance] +extra_opts = ["gevent_wait_ready"] diff --git a/benchmarks/bm_gevent_hub/pyproject.toml b/benchmarks/bm_gevent_hub/pyproject.toml new file mode 100644 index 0000000..b2eb678 --- /dev/null +++ b/benchmarks/bm_gevent_hub/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "bm_gevent_hub" +dependencies = ["gevent"] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/gevent_bench_hub_requirements.txt b/benchmarks/bm_gevent_hub/requirements.txt similarity index 100% rename from benchmarks/gevent_bench_hub_requirements.txt rename to benchmarks/bm_gevent_hub/requirements.txt diff --git a/benchmarks/bm_gevent_hub/run_benchmark.py b/benchmarks/bm_gevent_hub/run_benchmark.py new file mode 100644 index 0000000..4479492 --- /dev/null +++ b/benchmarks/bm_gevent_hub/run_benchmark.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +""" +Benchmarks for hub primitive operations. + +Taken from https://github.com/gevent/gevent/blob/master/benchmarks/bench_hub.py +Modified to remove perf and not need any command line arguments +""" +import contextlib + +import pyperf +import gevent +import gevent.hub +from greenlet import greenlet +from greenlet import getcurrent + + +@contextlib.contextmanager +def active_hub(hub=None): + if hub is None: + hub = gevent.get_hub() + try: + yield hub + finally: + # Destroy the loop so we don't keep building up state (e.g. callbacks). + hub.destroy(True) + + +class SwitchingParent(gevent.hub.Hub): + """A gevent hub greenlet that switches back and forth with its child.""" + + def __init__(self, nswitches): + super().__init__(None, None) + self.nswitches = nswitches + self.child = greenlet(self._run_child, self) + + def _run_child(self): + # Back to the hub, which in turn goes + # back to the main greenlet + switch = getcurrent().parent.switch + for _ in range(self.nswitches): + switch() + + def run(self): + # Return to the main greenlet. + switch = self.parent.switch + for _ in range(self.nswitches): + switch() + + +class NoopWatcher: + def start(self, cb, obj): + # Immediately switch back to the waiter, mark as ready + cb(obj) + + def stop(self): + pass + + +class ActiveWatcher: + active = True + callback = object() + + def close(self): + pass + + +class NoopWatchTarget(object): + def rawlink(self, cb): + cb(self) + + +############################# +# benchmarks + +def bench_switch(loops=1000): + """Measure switching between a greenlet and the gevent hub N^2 times.""" + hub = SwitchingParent(loops) + child = hub.child + + with active_hub(hub): + elapsed = 0 + child_switch = child.switch + for _ in range(loops): + t0 = pyperf.perf_counter() + child_switch() + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + return elapsed + + +def bench_wait_ready(loops=1000): + """Measure waiting for a "noop" watcher to become ready N times.""" + watcher = NoopWatcher() + + with active_hub() as hub: + elapsed = 0 + hub_wait = hub.wait + for _ in range(loops): + t0 = pyperf.perf_counter() + hub_wait(watcher) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + return elapsed + + +def bench_cancel_wait(loops=1000): + """Measure canceling N watchers. + + Note that it is the same watcher N times and that it is a fake + that pretends to already be started. + """ + watcher = ActiveWatcher() + + with active_hub() as hub: + t0 = pyperf.perf_counter() + + # Cancel the fake wait requests. + for _ in range(loops): + # Schedule all the callbacks. + hub.cancel_wait(watcher, None, True) + + # Wait for all the watchers to be closed. + # TODO Start timing here? + for cb in hub.loop._callbacks: + if cb.callback: + cb.callback(*cb.args) + cb.stop() # so the real loop won't do it + + return pyperf.perf_counter() - t0 + + +def bench_wait_func_ready(loops=1000): + """Measure waiting for N noop watch targets to become ready.""" + watched_objects = [NoopWatchTarget() for _ in range(loops)] + + t0 = pyperf.perf_counter() + gevent.hub.wait(watched_objects) + return pyperf.perf_counter() - t0 + + +BENCHMARKS = { + "gevent_hub": bench_switch, + "gevent_wait_func_ready": bench_wait_func_ready, + "gevent_wait_ready": bench_wait_ready, + "gevent_cancel_wait": bench_cancel_wait, + "gevent_switch": bench_switch, +} + + +############################# +# the script + +if __name__ == "__main__": + import sys + if '--legacy' in sys.argv: + for i in range(10000): + bench_switch() + sys.exit(0) + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of gevent" + runner.argparser.add_argument("--legacy", action='store_true') + runner.argparser.add_argument("benchmark", nargs="?", + choices=sorted(BENCHMARKS), + default="gevent_hub") + + args = runner.parse_args() + name = args.benchmark + bench = BENCHMARKS[name] + assert(bench.__code__.co_varnames[0] == 'loops') + inner_loops = bench.__defaults__[0] + + runner.bench_time_func(name, bench, inner_loops=inner_loops) diff --git a/benchmarks/bm_gunicorn/data/serve_aiohttp.py b/benchmarks/bm_gunicorn/data/serve_aiohttp.py new file mode 100644 index 0000000..f87888a --- /dev/null +++ b/benchmarks/bm_gunicorn/data/serve_aiohttp.py @@ -0,0 +1,12 @@ +from aiohttp import web + +async def hello(request): + return web.Response(text="Hello, world") + +async def main(): + app = web.Application() + app.add_routes([web.get('/', hello)]) + return app + +if __name__ == "__main__": + web.run_app(main()) diff --git a/benchmarks/bm_gunicorn/legacyutils.py b/benchmarks/bm_gunicorn/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_gunicorn/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_gunicorn/netutils.py b/benchmarks/bm_gunicorn/netutils.py new file mode 120000 index 0000000..3afa43f --- /dev/null +++ b/benchmarks/bm_gunicorn/netutils.py @@ -0,0 +1 @@ +../.libs/netutils.py \ No newline at end of file diff --git a/benchmarks/bm_gunicorn/pyproject.toml b/benchmarks/bm_gunicorn/pyproject.toml new file mode 100644 index 0000000..0f3d550 --- /dev/null +++ b/benchmarks/bm_gunicorn/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "bm_gunicorn" +dependencies = [ + "gunicorn", + "requests", + "uvloop", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/gunicorn_requirements.txt b/benchmarks/bm_gunicorn/requirements.txt similarity index 100% rename from benchmarks/gunicorn_requirements.txt rename to benchmarks/bm_gunicorn/requirements.txt diff --git a/benchmarks/bm_gunicorn/run_benchmark.py b/benchmarks/bm_gunicorn/run_benchmark.py new file mode 100644 index 0000000..41934f7 --- /dev/null +++ b/benchmarks/bm_gunicorn/run_benchmark.py @@ -0,0 +1,80 @@ +import os.path +import requests +import sys + +import pyperf +import netutils + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +GUNICORN = os.path.join( + os.path.dirname(sys.executable), + "gunicorn", +) +ADDR = "127.0.0.1:8000" +ARGV = [ + GUNICORN, "serve_aiohttp:main", + "--bind", ADDR, + "-w", "1", + "--worker-class", "aiohttp.GunicornWebWorker", +] + + +############################# +# benchmarks + +def bench_gunicorn(loops=3000): + elapsed, _ = _bench_gunicorn(loops) + return elapsed + + +def _bench_gunicorn(loops=3000, legacy=False): + """Measure N HTTP requests to a local server. + + Note that the server is freshly started here. + + Only the time for requests is measured here. The following are not: + + * preparing the site the server will serve + * starting the server + * stopping the server + + Hence this should be used with bench_time_func() + insted of bench_func(). + """ + start = pyperf.perf_counter() + elapsed = 0 + times = [] + with netutils.serving(ARGV, DATADIR, ADDR): + requests_get = requests.get + for i in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long a request takes. + t0 = pyperf.perf_counter() + requests_get("http://localhost:8000/blog/").text + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + if legacy and (i % 100 == 0): + print(i, t0 - start) + times.append(pyperf.perf_counter()) + if legacy: + total = times[-1] - start + print("%.2fs (%.3freq/s)" % (total, loops / total)) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_gunicorn, legacyarg='legacy') + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of gunicorn" + runner.bench_time_func("gunicorn", bench_gunicorn) diff --git a/data/reddit_comments.json b/benchmarks/bm_json/data/reddit_comments.json similarity index 100% rename from data/reddit_comments.json rename to benchmarks/bm_json/data/reddit_comments.json diff --git a/benchmarks/bm_json/legacyutils.py b/benchmarks/bm_json/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_json/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_json/pyproject.toml b/benchmarks/bm_json/pyproject.toml new file mode 100644 index 0000000..f8bf3fc --- /dev/null +++ b/benchmarks/bm_json/pyproject.toml @@ -0,0 +1,6 @@ +[project] +name = "bm_json" +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/json_bench_requirements.txt b/benchmarks/bm_json/requirements.txt similarity index 100% rename from benchmarks/json_bench_requirements.txt rename to benchmarks/bm_json/requirements.txt diff --git a/benchmarks/bm_json/run_benchmark.py b/benchmarks/bm_json/run_benchmark.py new file mode 100644 index 0000000..201a0a1 --- /dev/null +++ b/benchmarks/bm_json/run_benchmark.py @@ -0,0 +1,68 @@ +import json +import os.path + +import pyperf + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +TARGET = os.path.join(DATADIR, "reddit_comments.json") + + +############################# +# benchmarks + +def bench_json_loads(loops=400): + elapsed, _ = _bench_json_loads(loops) + return elapsed + + +def _bench_json_loads(loops=400): + """Measure running json.loads() N times. + + The target data is nearly 1100 JSON objects, each on a single line, + from a file. The objects: + + * are all flat (no compound values) + * vary a little in number of properties, though none are big + * have a mix of values, both of type and size + + Only the json.loads() calls are measured. The following are not: + + * reading the text from the file + * looping through the lines + """ + with open(TARGET) as f: + s = f.read() + lines = s.splitlines() + + elapsed = 0 + times = [] + for _ in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long json.loads() takes. + t0 = pyperf.perf_counter() + for text in lines: + if not text: + continue + json.loads(text) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_json_loads) + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of json" + runner.bench_time_func("json", bench_json_loads) diff --git a/data/kinto_project/.coveragerc b/benchmarks/bm_kinto/data/.coveragerc similarity index 100% rename from data/kinto_project/.coveragerc rename to benchmarks/bm_kinto/data/.coveragerc diff --git a/data/kinto_project/CHANGES.txt b/benchmarks/bm_kinto/data/CHANGES.txt similarity index 100% rename from data/kinto_project/CHANGES.txt rename to benchmarks/bm_kinto/data/CHANGES.txt diff --git a/data/kinto_project/MANIFEST.in b/benchmarks/bm_kinto/data/MANIFEST.in similarity index 100% rename from data/kinto_project/MANIFEST.in rename to benchmarks/bm_kinto/data/MANIFEST.in diff --git a/data/kinto_project/README.txt b/benchmarks/bm_kinto/data/README.txt similarity index 100% rename from data/kinto_project/README.txt rename to benchmarks/bm_kinto/data/README.txt diff --git a/data/kinto_project/app.wsgi b/benchmarks/bm_kinto/data/app.wsgi similarity index 100% rename from data/kinto_project/app.wsgi rename to benchmarks/bm_kinto/data/app.wsgi diff --git a/data/kinto_project/config/kinto.ini b/benchmarks/bm_kinto/data/config/kinto.ini similarity index 100% rename from data/kinto_project/config/kinto.ini rename to benchmarks/bm_kinto/data/config/kinto.ini diff --git a/data/kinto_project/development.ini b/benchmarks/bm_kinto/data/development.ini similarity index 100% rename from data/kinto_project/development.ini rename to benchmarks/bm_kinto/data/development.ini diff --git a/data/kinto_project/kinto_project/__init__.py b/benchmarks/bm_kinto/data/kinto_project/__init__.py similarity index 100% rename from data/kinto_project/kinto_project/__init__.py rename to benchmarks/bm_kinto/data/kinto_project/__init__.py diff --git a/data/kinto_project/kinto_project/static/pyramid-16x16.png b/benchmarks/bm_kinto/data/kinto_project/static/pyramid-16x16.png similarity index 100% rename from data/kinto_project/kinto_project/static/pyramid-16x16.png rename to benchmarks/bm_kinto/data/kinto_project/static/pyramid-16x16.png diff --git a/data/kinto_project/kinto_project/static/pyramid.png b/benchmarks/bm_kinto/data/kinto_project/static/pyramid.png similarity index 100% rename from data/kinto_project/kinto_project/static/pyramid.png rename to benchmarks/bm_kinto/data/kinto_project/static/pyramid.png diff --git a/data/kinto_project/kinto_project/static/theme.css b/benchmarks/bm_kinto/data/kinto_project/static/theme.css similarity index 100% rename from data/kinto_project/kinto_project/static/theme.css rename to benchmarks/bm_kinto/data/kinto_project/static/theme.css diff --git a/data/kinto_project/kinto_project/templates/layout.jinja2 b/benchmarks/bm_kinto/data/kinto_project/templates/layout.jinja2 similarity index 100% rename from data/kinto_project/kinto_project/templates/layout.jinja2 rename to benchmarks/bm_kinto/data/kinto_project/templates/layout.jinja2 diff --git a/data/kinto_project/kinto_project/templates/mytemplate.jinja2 b/benchmarks/bm_kinto/data/kinto_project/templates/mytemplate.jinja2 similarity index 100% rename from data/kinto_project/kinto_project/templates/mytemplate.jinja2 rename to benchmarks/bm_kinto/data/kinto_project/templates/mytemplate.jinja2 diff --git a/data/kinto_project/kinto_project/tests.py b/benchmarks/bm_kinto/data/kinto_project/tests.py similarity index 100% rename from data/kinto_project/kinto_project/tests.py rename to benchmarks/bm_kinto/data/kinto_project/tests.py diff --git a/data/kinto_project/kinto_project/views.py b/benchmarks/bm_kinto/data/kinto_project/views.py similarity index 100% rename from data/kinto_project/kinto_project/views.py rename to benchmarks/bm_kinto/data/kinto_project/views.py diff --git a/data/kinto_project/nginx.conf b/benchmarks/bm_kinto/data/nginx.conf similarity index 100% rename from data/kinto_project/nginx.conf rename to benchmarks/bm_kinto/data/nginx.conf diff --git a/data/kinto_project/production.ini b/benchmarks/bm_kinto/data/production.ini similarity index 100% rename from data/kinto_project/production.ini rename to benchmarks/bm_kinto/data/production.ini diff --git a/data/kinto_project/pytest.ini b/benchmarks/bm_kinto/data/pytest.ini similarity index 100% rename from data/kinto_project/pytest.ini rename to benchmarks/bm_kinto/data/pytest.ini diff --git a/data/kinto_project/setup.py b/benchmarks/bm_kinto/data/setup.py similarity index 100% rename from data/kinto_project/setup.py rename to benchmarks/bm_kinto/data/setup.py diff --git a/data/kinto_project/uwsgi_params b/benchmarks/bm_kinto/data/uwsgi_params similarity index 100% rename from data/kinto_project/uwsgi_params rename to benchmarks/bm_kinto/data/uwsgi_params diff --git a/benchmarks/bm_kinto/legacyutils.py b/benchmarks/bm_kinto/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_kinto/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_kinto/netutils.py b/benchmarks/bm_kinto/netutils.py new file mode 120000 index 0000000..3afa43f --- /dev/null +++ b/benchmarks/bm_kinto/netutils.py @@ -0,0 +1 @@ +../.libs/netutils.py \ No newline at end of file diff --git a/benchmarks/bm_kinto/pyproject.toml b/benchmarks/bm_kinto/pyproject.toml new file mode 100644 index 0000000..72cdb08 --- /dev/null +++ b/benchmarks/bm_kinto/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "bm_kinto" +dependencies = [ + "kinto", + "uWSGI", + "pyramind", + #"pyramid_jinja2", + #"pyramid_debugtoolbar", + "waitress", + "requests", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/kinto_bench_requirements.txt b/benchmarks/bm_kinto/requirements.txt similarity index 100% rename from benchmarks/kinto_bench_requirements.txt rename to benchmarks/bm_kinto/requirements.txt diff --git a/benchmarks/bm_kinto/run_benchmark.py b/benchmarks/bm_kinto/run_benchmark.py new file mode 100644 index 0000000..97122d3 --- /dev/null +++ b/benchmarks/bm_kinto/run_benchmark.py @@ -0,0 +1,90 @@ +import os +import os.path +import requests +import shutil +import subprocess +import sys +import urllib + +import pyperf +import netutils + + +PYTHON = os.path.abspath(sys.executable) +UWSGI = os.path.join(os.path.dirname(PYTHON), "uwsgi") +NGINX = shutil.which("nginx") + +SOCK = "/tmp/kinto.sock" +ADDR = "127.0.0.1:8000" + +DATADIR = os.path.join( + os.path.abspath(os.path.dirname(__file__)), + "data", +) +SETUP_PY = os.path.join(DATADIR, "setup.py") +PRODUCTION_INI = os.path.join(DATADIR, "production.ini") +NGINX_CONF = os.path.join(DATADIR, "nginx.conf") + + +############################# +# benchmarks + +def bench_kinto(loops=5000): + elapsed, _ = _bench_kinto(loops) + return elapsed + + +def _bench_kinto(loops=5000, legacy=False): + cmd = [PYTHON, SETUP_PY, "develop"] + proc = subprocess.run( + cmd, + cwd=DATADIR, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + if proc.returncode != 0: + print(f'# running: {" ".join(cmd)} (in {DATADIR})') + subprocess.run(cmd, cwd=DATADIR, check=True) + + cmd_app = [UWSGI, PRODUCTION_INI] + with netutils.serving(cmd_app, DATADIR, SOCK, kill=True): + cmd_web = [NGINX, "-c", NGINX_CONF, "-p", DATADIR] + with netutils.serving(cmd_web, DATADIR, ADDR, pause=0.010, quiet=False): + if legacy: + print(requests.get("http://localhost:8000/v1").text) + # print(requests.put("http://localhost:8000/v1/accounts/testuser", json={"data": {"password": "password1"}}).text) + + start = pyperf.perf_counter() + elapsed = 0 + times = [] + for i in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long a request takes. + t0 = pyperf.perf_counter() + # requests.get("http://localhost:8000/v1/").text + urllib.request.urlopen("http://localhost:8000/v1/").read() + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + if legacy and (i % 100 == 0): + print(i, t0 - start) + times.append(pyperf.perf_counter()) + if legacy: + total = times[-1] - start + print("%.2fs (%.3freq/s)" % (total, loops / total)) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_kinto, legacyarg='legacy') + + if NGINX is None: + raise Exception("nginx is not installed") + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of kinto" + runner.bench_time_func("kinto", bench_kinto) diff --git a/benchmarks/bm_mypy/bm_mypyc.toml b/benchmarks/bm_mypy/bm_mypyc.toml new file mode 100644 index 0000000..ca894a6 --- /dev/null +++ b/benchmarks/bm_mypy/bm_mypyc.toml @@ -0,0 +1,9 @@ +[project] +name = "bm_mypyc" +dependencies = [ + "mypy", +] +dynamic = ["version"] + +[tool.pyperformance] +extra_opts = ["--loops", "50"] diff --git a/data/mypy_target.py b/benchmarks/bm_mypy/data/mypy_target.py similarity index 100% rename from data/mypy_target.py rename to benchmarks/bm_mypy/data/mypy_target.py diff --git a/benchmarks/bm_mypy/legacyutils.py b/benchmarks/bm_mypy/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_mypy/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_mypy/pyproject.toml b/benchmarks/bm_mypy/pyproject.toml new file mode 100644 index 0000000..5da0cd8 --- /dev/null +++ b/benchmarks/bm_mypy/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "bm_mypy" +dependencies = [ + "mypy", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/mypy_bench_requirements.txt b/benchmarks/bm_mypy/requirements.txt similarity index 100% rename from benchmarks/mypy_bench_requirements.txt rename to benchmarks/bm_mypy/requirements.txt diff --git a/benchmarks/bm_mypy/run_benchmark.py b/benchmarks/bm_mypy/run_benchmark.py new file mode 100644 index 0000000..67e8833 --- /dev/null +++ b/benchmarks/bm_mypy/run_benchmark.py @@ -0,0 +1,70 @@ +import os.path + +import pyperf +from mypy.main import main + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +""" +I tested it, and it looks like we get the same performance conclusions +when we run on the same file multiple times as if we run on a set of files once. + +So for convenience run on a single file multiple times. +""" +TARGETS = [ + os.path.join(DATADIR, "mypy_target.py"), +] + + +############################# +# benchmarks + +def bench_mypy(loops=20): + elapsed, _ = _bench_mypy(loops) + return elapsed + + +def _bench_mypy(loops=20, *, legacy=False): + """Meansure running mypy on a file N times. + + The target file is large (over 2300 lines) with extensive use + of type hints. + + Note that mypy's main() is called directly, which means + the measurement includes the time it takes to read the file + from disk. Also, all output is discarded (sent to /dev/null). + """ + elapsed = 0 + times = [] + with open(os.devnull, "w") as devnull: + for i in range(loops): + if legacy: + print(i) + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long main() takes. + t0 = pyperf.perf_counter() + try: + main(None, devnull, devnull, TARGETS) + except SystemExit: + pass + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_mypy, legacyarg='legacy') + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of mypy types" + runner.bench_time_func("mypy", bench_mypy) diff --git a/data/pycparser_target/README b/benchmarks/bm_pycparser/data/pycparser_target/README similarity index 100% rename from data/pycparser_target/README rename to benchmarks/bm_pycparser/data/pycparser_target/README diff --git a/data/pycparser_target/redis.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/redis.c.ppout similarity index 100% rename from data/pycparser_target/redis.c.ppout rename to benchmarks/bm_pycparser/data/pycparser_target/redis.c.ppout diff --git a/data/pycparser_target/sqlite-btree.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/sqlite-btree.c.ppout similarity index 100% rename from data/pycparser_target/sqlite-btree.c.ppout rename to benchmarks/bm_pycparser/data/pycparser_target/sqlite-btree.c.ppout diff --git a/data/pycparser_target/tccgen.c.ppout b/benchmarks/bm_pycparser/data/pycparser_target/tccgen.c.ppout similarity index 100% rename from data/pycparser_target/tccgen.c.ppout rename to benchmarks/bm_pycparser/data/pycparser_target/tccgen.c.ppout diff --git a/benchmarks/bm_pycparser/legacyutils.py b/benchmarks/bm_pycparser/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_pycparser/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_pycparser/pyproject.toml b/benchmarks/bm_pycparser/pyproject.toml new file mode 100644 index 0000000..38020ba --- /dev/null +++ b/benchmarks/bm_pycparser/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "bm_pycparser" +dependencies = [ + "pycparser", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/pycparser_bench_requirements.txt b/benchmarks/bm_pycparser/requirements.txt similarity index 100% rename from benchmarks/pycparser_bench_requirements.txt rename to benchmarks/bm_pycparser/requirements.txt diff --git a/benchmarks/bm_pycparser/run_benchmark.py b/benchmarks/bm_pycparser/run_benchmark.py new file mode 100644 index 0000000..eca4236 --- /dev/null +++ b/benchmarks/bm_pycparser/run_benchmark.py @@ -0,0 +1,79 @@ +import os +import os.path + +import pyperf +from pycparser import c_parser, c_ast + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +TARGET = os.path.join(DATADIR, "pycparser_target") + + +def _iter_files(rootdir=TARGET): + for name in os.listdir(rootdir): + if not name.endswith(".ppout"): + continue + filename = os.path.join(TARGET, name) + with open(filename) as f: + yield (filename, f.read()) + + +def parse_files(files): + for _, text in files: + # We use a new parser each time because CParser objects + # aren't designed for re-use. + parser = c_parser.CParser() + ast = parser.parse(text, '') + assert isinstance(ast, c_ast.FileAST) + + +############################# +# benchmarks + +def bench_pycparser(loops=20): + elapsed, _ = _bench_pycparser(loops) + return elapsed + + +def _bench_pycparser(loops=20): + """Measure running pycparser on several large C files N times. + + The files are all relatively large, from well-known projects. + Each is already preprocessed. + + Only the CParser.parse() calls are measured. The following are not: + + * finding the target files + * reading them from disk + * creating the CParser object + """ + files = list(_iter_files()) + + elapsed = 0 + times = [] + for _ in range(loops): + times.append(pyperf.perf_counter()) + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long parser.parse() takes. + t0 = pyperf.perf_counter() + parse_files(files) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_pycparser) + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of pycparser" + runner.bench_time_func("pycparser", bench_pycparser) diff --git a/data/pylint_target/__init__.py b/benchmarks/bm_pylint/data/pylint_target/__init__.py similarity index 100% rename from data/pylint_target/__init__.py rename to benchmarks/bm_pylint/data/pylint_target/__init__.py diff --git a/data/pylint_target/dist.py b/benchmarks/bm_pylint/data/pylint_target/dist.py similarity index 100% rename from data/pylint_target/dist.py rename to benchmarks/bm_pylint/data/pylint_target/dist.py diff --git a/benchmarks/bm_pylint/legacyutils.py b/benchmarks/bm_pylint/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_pylint/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_pylint/pyproject.toml b/benchmarks/bm_pylint/pyproject.toml new file mode 100644 index 0000000..207eec9 --- /dev/null +++ b/benchmarks/bm_pylint/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "bm_pylint" +dependencies = [ + "pylint", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/pylint_bench_requirements.txt b/benchmarks/bm_pylint/requirements.txt similarity index 100% rename from benchmarks/pylint_bench_requirements.txt rename to benchmarks/bm_pylint/requirements.txt diff --git a/benchmarks/bm_pylint/run_benchmark.py b/benchmarks/bm_pylint/run_benchmark.py new file mode 100644 index 0000000..40ad2c8 --- /dev/null +++ b/benchmarks/bm_pylint/run_benchmark.py @@ -0,0 +1,70 @@ +import os.path + +import pyperf +#from pylint import epylint as lint +from pylint.lint import Run + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +TARGETS = [ + os.path.join(DATADIR, "pylint_target", "dist.py"), +] + + +def noop(*args, **kw): + pass + + +class NullReporter: + path_strip_prefix = "/" + def __getattr__(self, attr): + return noop + + +############################# +# benchmarks + +def bench_pylint(loops=10): + elapsed, _ = _bench_pylint(loops) + return elapsed + + +def _bench_pylint(loops=10): + """Measure running pylint on a file N times. + + The target file is a relatively large, complex one copied + from distutils in the stdlib. + + pylint seems to speed up considerably as it progresses, and this + benchmark includes that. + """ + elapsed = 0 + times = [] + for i in range(loops): + print(i) + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long Run() takes. + t0 = pyperf.perf_counter() + reporter = NullReporter() + Run(TARGETS, exit=False, reporter=reporter) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_pylint) + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of pylint" + runner.bench_time_func("pylint", bench_pylint) diff --git a/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py b/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_pytorch_alexnet_inference/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml b/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml new file mode 100644 index 0000000..1225fd2 --- /dev/null +++ b/benchmarks/bm_pytorch_alexnet_inference/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "bm_pytorch_alexnet_inference" +dependencies = [ + "torch", + "Pillow", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/pytorch_alexnet_inference_requirements.txt b/benchmarks/bm_pytorch_alexnet_inference/requirements.txt similarity index 74% rename from benchmarks/pytorch_alexnet_inference_requirements.txt rename to benchmarks/bm_pytorch_alexnet_inference/requirements.txt index d3cba68..49338ed 100644 --- a/benchmarks/pytorch_alexnet_inference_requirements.txt +++ b/benchmarks/bm_pytorch_alexnet_inference/requirements.txt @@ -2,3 +2,4 @@ future==0.18.2 numpy==1.19.0 Pillow==8.0.0 torch==1.5.1 +torchvision==0.6.1 diff --git a/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py b/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py new file mode 100644 index 0000000..5c46955 --- /dev/null +++ b/benchmarks/bm_pytorch_alexnet_inference/run_benchmark.py @@ -0,0 +1,90 @@ +import os +import os.path +import sys +import urllib.request + +import pyperf +from PIL import Image +import torch +from torchvision import transforms + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +if not os.path.exists(DATADIR): + os.mkdir(DATADIR) + +# TODO: Vendor this file (and the pytorch hub model) into the data dir, +# to avoid network access and to pin the data for consistent results. +URL = "https://github.com/pytorch/hub/raw/master/images/dog.jpg" +FILENAME = os.path.join(DATADIR, "dog.jpg") + + +############################# +# benchmarks + +def bench_pytorch(loops=1000): + elapsed, _ = _bench_pytorch(loops) + return elapsed + + +def _bench_pytorch(loops=1000, *, legacy=False): + """Measure using pytorch to transform an image N times. + + This involves the following steps: + + * load a pre-trained model (alexnet) + * mark it for evaluation + * download an image + * prepare it to be run through the model + * turn off gradients computation + * run the image through the model + + Only that last step is measured (and repeated N times). + """ + start = pyperf.perf_counter() + model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True) + # assert pyperf.perf_counter() - start < 3, "looks like we just did the first-time download, run this benchmark again to get a clean run" + model.eval() + + urllib.request.urlretrieve(URL, FILENAME) + input_image = Image.open(FILENAME) + preprocess = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + + with torch.no_grad(): + elapsed = 0 + times = [] + for i in range(loops): + if legacy and (i % 10 == 0): + print(i) + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long model() takes. + t0 = pyperf.perf_counter() + output = model(input_batch) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_pytorch, legacyarg='legacy') + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of pytorch" + runner.bench_time_func("pytorch", bench_pytorch) diff --git a/data/Makefile b/benchmarks/bm_thrift/data/Makefile similarity index 100% rename from data/Makefile rename to benchmarks/bm_thrift/data/Makefile diff --git a/data/addressbook.thrift b/benchmarks/bm_thrift/data/addressbook.thrift similarity index 100% rename from data/addressbook.thrift rename to benchmarks/bm_thrift/data/addressbook.thrift diff --git a/data/thrift/__init__.py b/benchmarks/bm_thrift/data/thrift/__init__.py similarity index 100% rename from data/thrift/__init__.py rename to benchmarks/bm_thrift/data/thrift/__init__.py diff --git a/data/thrift/addressbook/__init__.py b/benchmarks/bm_thrift/data/thrift/addressbook/__init__.py similarity index 100% rename from data/thrift/addressbook/__init__.py rename to benchmarks/bm_thrift/data/thrift/addressbook/__init__.py diff --git a/data/thrift/addressbook/constants.py b/benchmarks/bm_thrift/data/thrift/addressbook/constants.py similarity index 100% rename from data/thrift/addressbook/constants.py rename to benchmarks/bm_thrift/data/thrift/addressbook/constants.py diff --git a/data/thrift/addressbook/ttypes.py b/benchmarks/bm_thrift/data/thrift/addressbook/ttypes.py similarity index 100% rename from data/thrift/addressbook/ttypes.py rename to benchmarks/bm_thrift/data/thrift/addressbook/ttypes.py diff --git a/benchmarks/bm_thrift/legacyutils.py b/benchmarks/bm_thrift/legacyutils.py new file mode 120000 index 0000000..644cca6 --- /dev/null +++ b/benchmarks/bm_thrift/legacyutils.py @@ -0,0 +1 @@ +../.libs/legacyutils.py \ No newline at end of file diff --git a/benchmarks/bm_thrift/pyproject.toml b/benchmarks/bm_thrift/pyproject.toml new file mode 100644 index 0000000..8d12b9e --- /dev/null +++ b/benchmarks/bm_thrift/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "bm_thrift" +dependencies = [ + "thrift", +] +dynamic = ["version"] + +[tool.pyperformance] +inherits = ".." diff --git a/benchmarks/thrift_bench_requirements.txt b/benchmarks/bm_thrift/requirements.txt similarity index 100% rename from benchmarks/thrift_bench_requirements.txt rename to benchmarks/bm_thrift/requirements.txt diff --git a/benchmarks/bm_thrift/run_benchmark.py b/benchmarks/bm_thrift/run_benchmark.py new file mode 100644 index 0000000..9b45efd --- /dev/null +++ b/benchmarks/bm_thrift/run_benchmark.py @@ -0,0 +1,96 @@ +# Adapted from https://raw.githubusercontent.com/Thriftpy/thriftpy2/master/benchmark/benchmark_apache_thrift_struct.py + +import os.path +import sys + +import pyperf +from thrift.TSerialization import serialize, deserialize +from thrift.protocol.TBinaryProtocol import ( + TBinaryProtocolFactory, + TBinaryProtocolAcceleratedFactory +) + + +DATADIR = os.path.join( + os.path.dirname(__file__), + "data", +) +# The target files were generated using the make file in the data dir. +TARGET = os.path.join(DATADIR, "thrift") + + +if TARGET not in sys.path: + sys.path.insert(0, TARGET) +from addressbook import ttypes + + +def make_addressbook(): + phone1 = ttypes.PhoneNumber() + phone1.type = ttypes.PhoneType.MOBILE + phone1.number = '555-1212' + phone2 = ttypes.PhoneNumber() + phone2.type = ttypes.PhoneType.HOME + phone2.number = '555-1234' + person = ttypes.Person() + person.name = "Alice" + person.phones = [phone1, phone2] + person.created_at = 1400000000 + + ab = ttypes.AddressBook() + ab.people = {person.name: person} + return ab + + +############################# +# benchmarks + +def bench_thrift(loops=1000): + elapsed, _ = _bench_thrift(loops) + return elapsed + + +def _bench_thrift(loops=1000): + """Measure using a thrift-generated library N times. + + The target is a simple addressbook. We measure the following: + + * create an addressbook with 1 person in it + * serialize it + * deserialize it into a new addressbook + + For each iteration we repeat this 100 times. + """ + # proto_factory = TBinaryProtocolFactory() + proto_factory = TBinaryProtocolAcceleratedFactory() + + elapsed = 0 + times = [] + for _ in range(loops): + # This is a macro benchmark for a Python implementation + # so "elapsed" covers more than just how long the Addressbook ops take. + t0 = pyperf.perf_counter() + for _ in range(100): + # First, create the addressbook. + ab = make_addressbook() + # Then, round-trip through serialization. + encoded = serialize(ab, proto_factory) + ab2 = ttypes.AddressBook() + deserialize(ab2, encoded, proto_factory) + t1 = pyperf.perf_counter() + + elapsed += t1 - t0 + times.append(t0) + times.append(pyperf.perf_counter()) + return elapsed, times + + +############################# +# the script + +if __name__ == "__main__": + from legacyutils import maybe_handle_legacy + maybe_handle_legacy(_bench_thrift) + + runner = pyperf.Runner() + runner.metadata['description'] = "Test the performance of thrift" + runner.bench_time_func("thrift", bench_thrift) diff --git a/benchmarks/djangocms.py b/benchmarks/djangocms.py deleted file mode 100644 index af0d3ca..0000000 --- a/benchmarks/djangocms.py +++ /dev/null @@ -1,129 +0,0 @@ -""" -Django-cms test -Sets up a djangocms installation, and hits '/' a number of times. -'/' is not super interesting, but it still exercises a little bit of -functionality; looking at cms/templates/cms/welcome.html, it seems -to do a decent amount of template logic, as well as do some basic -user auth. -We could probably improve the flow though, perhaps by logging in -and browsing around. -""" - -import os -import requests -import socket -import subprocess -import sys -import tempfile -import time -import json - -def setup(): - """ - Set up a djangocms installation. - Runs the initial bootstrapping without the db migration, - so that we can turn off sqlite synchronous and avoid fs time. - Rough testing shows that setting synchronous=OFF is basically - the same performance as running on /dev/shm - """ - - subprocess.check_call([exe.replace("python3", "djangocms"), "testsite", "--verbose", "--no-sync"]) - - with open("testsite/testsite/settings.py", "a") as f: - f.write(""" -from django.db.backends.signals import connection_created -def set_no_sychronous(sender, connection, **kwargs): - if connection.vendor == 'sqlite': - cursor = connection.cursor() - cursor.execute('PRAGMA synchronous = OFF;') - -connection_created.connect(set_no_sychronous) -""") - start = time.time() - subprocess.check_call([exe, "manage.py", "migrate"], cwd="testsite") - elapsed = time.time() - start - print("%.2fs to initialize db" % (elapsed,)) - -def waitUntilUp(addr, timeout=10.0): - start = time.time() - while True: - try: - with socket.create_connection(addr) as sock: - return - except ConnectionRefusedError: - if time.time() > start + timeout: - raise Exception("Timeout reached when trying to connect") - time.sleep(0.001) - -def runbenchmark(n=800, out_file=None): - p = subprocess.Popen([exe, "manage.py", "runserver", "--noreload"], cwd="testsite", stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT) - try: - waitUntilUp(("127.0.0.1", 8000)) - - start = time.time() - times = [] - for i in range(n): - times.append(time.time()) - if i % 100 == 0: - print(i, time.time() - start) - requests.get("http://localhost:8000/").text - times.append(time.time()) - elapsed = time.time() - start - print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed)) - - exitcode = p.poll() - assert exitcode is None, exitcode - - if out_file: - json.dump(times, open(out_file, 'w')) - - finally: - p.terminate() - p.wait() - -if __name__ == "__main__": - exe = sys.executable - # Hack: make sure this file gets run as "python3" so that perf will collate across different processes - if not exe.endswith('3'): - os.execv(exe + '3', [exe + '3'] + sys.argv) - - os.environ["PATH"] = os.path.dirname(exe) + ":" + os.environ["PATH"] - - """ - Usage: - python djangocms.py - python djangocms.py --setup DIR - python djangocms.py --serve DIR - - The first form creates a temporary directory, sets up djangocms in it, - serves out of it, and removes the directory. - The second form sets up a djangocms installation in the given directory. - The third form runs a benchmark out of an already-set-up directory - The second and third forms are useful if you want to benchmark the - initial migration phase separately from the second serving phase. - """ - if "--setup" in sys.argv: - assert len(sys.argv) > 2 - dir = sys.argv[-1] - os.makedirs(dir, exist_ok=True) - os.chdir(dir) - setup() - elif "--serve" in sys.argv: - assert len(sys.argv) > 2 - os.chdir(sys.argv[-1]) - runbenchmark() - else: - n = 800 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - out_file = None - if len(sys.argv) > 2: - out_file = os.path.abspath(sys.argv[2]) - - # It might be interesting to put the temporary directory in /dev/shm, - # which makes the initial db migration about 20% faster. - with tempfile.TemporaryDirectory(prefix="djangocms_test_") as d: - os.chdir(d) - - setup() - runbenchmark(n, out_file) diff --git a/benchmarks/flaskblogging.py b/benchmarks/flaskblogging.py deleted file mode 100644 index 40fa891..0000000 --- a/benchmarks/flaskblogging.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import os -import requests -import subprocess -import sys -import threading -import time - -from djangocms import waitUntilUp - -if __name__ == "__main__": - exe = sys.executable - - times = [] - - p = subprocess.Popen([exe, "../data/flaskblogging_serve.py"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.dirname(__file__)) - try: - waitUntilUp(("127.0.0.1", 8000)) - - n = 1800 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - start = time.time() - for i in range(n): - times.append(time.time()) - if i % 100 == 0: - print(i, time.time() - start) - requests.get("http://localhost:8000/blog/").text - times.append(time.time()) - elapsed = time.time() - start - print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed)) - - assert p.poll() is None, p.poll() - - finally: - p.terminate() - p.wait() - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/gevent_bench_hub.py b/benchmarks/gevent_bench_hub.py deleted file mode 100644 index 7e27737..0000000 --- a/benchmarks/gevent_bench_hub.py +++ /dev/null @@ -1,122 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Benchmarks for hub primitive operations. - -Taken from https://github.com/gevent/gevent/blob/master/benchmarks/bench_hub.py -Modified to remove perf and not need any command line arguments -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# import perf -# from perf import perf_counter - -import gevent -from greenlet import greenlet -from greenlet import getcurrent - - -N = 1000 - -def bench_switch(): - - class Parent(type(gevent.get_hub())): - def run(self): - parent = self.parent - for _ in range(N): - parent.switch() - - def child(): - parent = getcurrent().parent - # Back to the hub, which in turn goes - # back to the main greenlet - for _ in range(N): - parent.switch() - - hub = Parent(None, None) - child_greenlet = greenlet(child, hub) - for _ in range(N): - child_greenlet.switch() - -def bench_wait_ready(): - - class Watcher(object): - def start(self, cb, obj): - # Immediately switch back to the waiter, mark as ready - cb(obj) - - def stop(self): - pass - - watcher = Watcher() - hub = gevent.get_hub() - - for _ in range(1000): - hub.wait(watcher) - -def bench_cancel_wait(): - - class Watcher(object): - active = True - callback = object() - - def close(self): - pass - - watcher = Watcher() - hub = gevent.get_hub() - loop = hub.loop - - for _ in range(1000): - # Schedule all the callbacks. - hub.cancel_wait(watcher, None, True) - - # Run them! - for cb in loop._callbacks: - if cb.callback: - cb.callback(*cb.args) - cb.stop() # so the real loop won't do it - - # destroy the loop so we don't keep building these functions - # up - hub.destroy(True) - -def bench_wait_func_ready(): - from gevent.hub import wait - class ToWatch(object): - def rawlink(self, cb): - cb(self) - - watched_objects = [ToWatch() for _ in range(N)] - - t0 = perf_counter() - - wait(watched_objects) - - return perf_counter() - t0 - -def main(): - - runner = perf.Runner() - - runner.bench_func('multiple wait ready', - bench_wait_func_ready, - inner_loops=N) - - runner.bench_func('wait ready', - bench_wait_ready, - inner_loops=N) - - runner.bench_func('cancel wait', - bench_cancel_wait, - inner_loops=N) - - runner.bench_func('switch', - bench_switch, - inner_loops=N) - -if __name__ == '__main__': - # main() - for i in range(10000): - bench_switch() diff --git a/benchmarks/gunicorn.py b/benchmarks/gunicorn.py deleted file mode 100644 index aa39d5f..0000000 --- a/benchmarks/gunicorn.py +++ /dev/null @@ -1,41 +0,0 @@ -import json -import os -import requests -import subprocess -import sys -import threading -import time - -from djangocms import waitUntilUp - -if __name__ == "__main__": - exe = sys.executable - - times = [] - - p = subprocess.Popen([os.path.join(os.path.dirname(exe), "gunicorn"), "gunicorn_serve:main", "--bind", "127.0.0.1:8000", "-w", "1", "--worker-class", "aiohttp.GunicornWebWorker"], stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT, cwd=os.path.join(os.path.dirname(__file__), "../data")) - try: - waitUntilUp(("127.0.0.1", 8000)) - - n = 3000 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - start = time.time() - for i in range(n): - times.append(time.time()) - if i % 100 == 0: - print(i, time.time() - start) - requests.get("http://localhost:8000/blog/").text - times.append(time.time()) - elapsed = time.time() - start - print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed)) - - assert p.poll() is None, p.poll() - - finally: - p.terminate() - p.wait() - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/json_bench.py b/benchmarks/json_bench.py deleted file mode 100644 index c762cd9..0000000 --- a/benchmarks/json_bench.py +++ /dev/null @@ -1,31 +0,0 @@ -import json -import os -import sys -import time - -if __name__ == "__main__": - exe = sys.executable - - times = [] - - with open(os.path.join(os.path.dirname(__file__), "../data/reddit_comments.json")) as f: - s = f.read() - - data = s.split('\n') - - n = 400 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - times = [] - - for i in range(n): - times.append(time.time()) - for s in data: - if not s: - continue - json.loads(s) - times.append(time.time()) - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/kinto_bench.py b/benchmarks/kinto_bench.py deleted file mode 100644 index ced23d7..0000000 --- a/benchmarks/kinto_bench.py +++ /dev/null @@ -1,75 +0,0 @@ -import json -import os -import requests -import subprocess -import sys -import threading -import time -import urllib - -from djangocms import waitUntilUp - -from os.path import join, abspath, dirname - -if __name__ == "__main__": - exe = sys.executable - def bin(name): - return join(dirname(exe), name) - def rel(path): - return abspath(join(dirname(__file__), path)) - - times = [] - - subprocess.check_call([abspath(exe), rel("../data/kinto_project/setup.py"), "develop"], cwd=rel("../data/kinto_project"), stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT) - - try: - os.remove("/tmp/kinto.sock") - except FileNotFoundError: - pass - p1 = subprocess.Popen([bin("uwsgi"), rel("../data/kinto_project/production.ini")], cwd=rel("../data/kinto_project"), stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT) - # p1 = subprocess.Popen([bin("uwsgi"), rel("../data/kinto_project/production.ini")], cwd=rel("../data/kinto_project")) - while not os.path.exists("/tmp/kinto.sock"): - time.sleep(0.001) - - # p2 = subprocess.Popen(["nginx", "-c", abspath("../data/kinto_project/nginx.conf"), "-p", abspath("../data/kinto_project")], cwd="../data/kinto_project", stdout=open("/dev/null", "w"), stderr=subprocess.STDOUT) - p2 = subprocess.Popen(["nginx", "-c", rel("../data/kinto_project/nginx.conf"), "-p", rel("../data/kinto_project")], cwd=rel("../data/kinto_project")) - - time.sleep(0.010) - - try: - waitUntilUp(("127.0.0.1", 8000)) - - assert p1.poll() is None, p1.poll() - assert p2.poll() is None, p2.poll() - - print(requests.get("http://localhost:8000/v1").text) - # print(requests.put("http://localhost:8000/v1/accounts/testuser", json={"data": {"password": "password1"}}).text) - - n = 5000 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - start = time.time() - for i in range(n): - times.append(time.time()) - if i % 100 == 0: - print(i, time.time() - start) - # requests.get("http://localhost:8000/v1/").text - urllib.request.urlopen("http://localhost:8000/v1/").read() - times.append(time.time()) - elapsed = time.time() - start - print("%.2fs (%.3freq/s)" % (elapsed, n / elapsed)) - - assert p1.poll() is None, p1.poll() - assert p2.poll() is None, p2.poll() - - finally: - p1.terminate() - p1.kill() - p1.wait() - # p2.kill() - p2.terminate() - p2.wait() - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/mypy_bench.py b/benchmarks/mypy_bench.py deleted file mode 100644 index b704e8c..0000000 --- a/benchmarks/mypy_bench.py +++ /dev/null @@ -1,33 +0,0 @@ -import json -import os -import sys -import time - -""" -I tested it, and it looks like we get the same performance conclusions -when we run on the same file multiple times as if we run on a set of files once. - -So for convenience run on a single file multiple times. -""" - -if __name__ == "__main__": - from mypy.main import main - - n = 20 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - target = os.path.join(os.path.dirname(__file__), "../data/mypy_target.py") - - times = [] - devnull = open("/dev/null", "w") - for i in range(n): - times.append(time.time()) - print(i) - try: - main(None, devnull, devnull, [target]) - except SystemExit: - pass - times.append(time.time()) - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/pycparser_bench.py b/benchmarks/pycparser_bench.py deleted file mode 100644 index 9a01cf4..0000000 --- a/benchmarks/pycparser_bench.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -import os -import sys -import time - -from pycparser import c_parser, c_ast - -def parse_files(files): - for code in files: - parser = c_parser.CParser() - ast = parser.parse(code, '') - assert isinstance(ast, c_ast.FileAST) - -if __name__ == "__main__": - n = 20 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - files = [] - directory = os.path.abspath(__file__ + "/../../data/pycparser_target") - for filename in os.listdir(directory): - filename = os.path.join(directory, filename) - if not filename.endswith(".ppout"): - continue - with open(filename) as f: - files.append(f.read()) - - times = [] - for i in range(n): - times.append(time.time()) - - parse_files(files) - - times.append(time.time()) - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/pylint_bench.py b/benchmarks/pylint_bench.py deleted file mode 100644 index 6736b88..0000000 --- a/benchmarks/pylint_bench.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -import os -import subprocess -import sys -import time - -from pylint import epylint as lint -from pylint.lint import Run - -""" -pylint benchmark - -pylint seems to speed up considerably as it progresses, and this -benchmark includes that -""" - -if __name__ == "__main__": - def noop(*args, **kw): - pass - class NullReporter: - path_strip_prefix = "/" - def __getattr__(self, attr): - return noop - - n = 10 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - times = [] - for i in range(n): - times.append(time.time()) - print(i) - Run([os.path.join(os.path.dirname(__file__), "../data/pylint_target/dist.py")], exit=False, reporter=NullReporter()) - times.append(time.time()) - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/pytorch_alexnet_inference.py b/benchmarks/pytorch_alexnet_inference.py deleted file mode 100644 index 0ccf648..0000000 --- a/benchmarks/pytorch_alexnet_inference.py +++ /dev/null @@ -1,43 +0,0 @@ -import json -import time -import torch -import urllib -import sys - -if __name__ == "__main__": - start = time.time() - model = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained=True) - # assert time.time() - start < 3, "looks like we just did the first-time download, run this benchmark again to get a clean run" - model.eval() - - url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg") - urllib.request.urlretrieve(url, filename) - - from PIL import Image - from torchvision import transforms - input_image = Image.open(filename) - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) - input_tensor = preprocess(input_image) - input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model - - n = 1000 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - with torch.no_grad(): - times = [] - for i in range(n): - times.append(time.time()) - if i % 10 == 0: - print(i) - output = model(input_batch) - times.append(time.time()) - print((len(times) - 1) / (times[-1] - times[0]) , "/s") - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) diff --git a/benchmarks/thrift_bench.py b/benchmarks/thrift_bench.py deleted file mode 100644 index 68d5a69..0000000 --- a/benchmarks/thrift_bench.py +++ /dev/null @@ -1,58 +0,0 @@ -# Adapted from https://raw.githubusercontent.com/Thriftpy/thriftpy2/master/benchmark/benchmark_apache_thrift_struct.py - -import json -import time - -from thrift.TSerialization import serialize, deserialize -from thrift.protocol.TBinaryProtocol import ( - TBinaryProtocolFactory, - TBinaryProtocolAcceleratedFactory -) - -import os -import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "../data/thrift")) -from addressbook import ttypes - - -def make_addressbook(): - phone1 = ttypes.PhoneNumber() - phone1.type = ttypes.PhoneType.MOBILE - phone1.number = '555-1212' - phone2 = ttypes.PhoneNumber() - phone2.type = ttypes.PhoneType.HOME - phone2.number = '555-1234' - person = ttypes.Person() - person.name = "Alice" - person.phones = [phone1, phone2] - person.created_at = 1400000000 - - ab = ttypes.AddressBook() - ab.people = {person.name: person} - return ab - - -def main(): - # proto_factory = TBinaryProtocolFactory() - proto_factory = TBinaryProtocolAcceleratedFactory() - - n = 1000 - if len(sys.argv) > 1: - n = int(sys.argv[1]) - - times = [] - - for i in range(n): - times.append(time.time()) - for j in range(100): - ab = make_addressbook() - encoded = serialize(ab, proto_factory) - ab2 = ttypes.AddressBook() - deserialize(ab2, encoded, proto_factory) - times.append(time.time()) - - if len(sys.argv) > 2: - json.dump(times, open(sys.argv[2], 'w')) - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3901717 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[project] +name = "python-macrobenchmarks" +version = "0.9.0" # XXX an arbitrary value; the repo doesn't have one +description = "Pyston benchmarks" +#requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/pyston/python-macrobenchmarks"} + +[tool.pyperformance] +manifest = "benchmarks/MANIFEST" diff --git a/run_all.sh b/run_all.sh index 27df37b..c1eeafb 100755 --- a/run_all.sh +++ b/run_all.sh @@ -13,9 +13,22 @@ set -x mkdir -p results ENV=/tmp/macrobenchmark_env -for bench in flaskblogging djangocms mypy_bench pylint_bench pycparser_bench pytorch_alexnet_inference gunicorn aiohttp thrift_bench gevent_bench_hub kinto_bench; do +for bench in flaskblogging djangocms mypy pylint pycparser pytorch_alexnet_inference gunicorn aiohttp thrift gevent_hub kinto; do + case $bench in + gevent_hub) + outname=gevent_bench_hub + ;; + mypy|pylint|pycparser|thrift|kinto) + outname=${bench}_bench + ;; + *) + outname=$bench + ;; + esac + rm -rf $ENV $BINARY -m venv $ENV - $ENV/bin/pip install -r $(dirname $0)/benchmarks/${bench}_requirements.txt - /usr/bin/time --verbose --output=results/${bench}.out $ENV/bin/python $(dirname $0)/benchmarks/${bench}.py + $ENV/bin/pip install pyperf==2.2.0 + $ENV/bin/pip install -r $(dirname $0)/benchmarks/bm_${bench}/requirements.txt + /usr/bin/time --verbose --output=results/${outname}.out $ENV/bin/python $(dirname $0)/benchmarks/bm_${bench}/run_benchmark.py --legacy done diff --git a/run_mypy.sh b/run_mypy.sh index 363fc7c..d030b5f 100644 --- a/run_mypy.sh +++ b/run_mypy.sh @@ -24,13 +24,14 @@ rm -rf /tmp/mypy git clone --depth 1 --branch v0.790 https://github.com/python/mypy/ /tmp/mypy cd /tmp/mypy +$ENV/bin/pip install pyperf==2.2.0 $ENV/bin/pip install -r mypy-requirements.txt $ENV/bin/pip install --upgrade setuptools git submodule update --init mypy/typeshed $ENV/bin/python setup.py --use-mypyc install cd - -time $ENV/bin/python benchmarks/mypy_bench.py 50 -time $ENV/bin/python benchmarks/mypy_bench.py 50 -time $ENV/bin/python benchmarks/mypy_bench.py 50 +time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50 +time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50 +time $ENV/bin/python benchmarks/bm_mypy/run_benchmark.py --legacy 50