Skip to content

Commit 5b23a24

Browse files
committed
benchmark| Adds basic performance benchmark baselines for pylint
Here we establish baseline benchmarks for the system when used in minimal way. Here we just confirm that -j1 vs -jN gives some boost in performance under simple situations, establishing a baseline for other benchmarks.
1 parent cfbb933 commit 5b23a24

File tree

2 files changed

+322
-0
lines changed

2 files changed

+322
-0
lines changed
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
""" Profiles basic -jX functionality """
2+
# Copyright (c) 2020 Frank Harrison <[email protected]>
3+
4+
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
5+
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING
6+
7+
# pylint: disable=protected-access,missing-function-docstring,no-self-use
8+
9+
import os
10+
import pprint
11+
import time
12+
13+
import pytest
14+
15+
import pylint.interfaces
16+
from pylint.checkers.base_checker import BaseChecker
17+
from pylint.lint import PyLinter, Run, check_parallel
18+
from pylint.testutils import TestReporter as Reporter
19+
from pylint.utils import register_plugins
20+
21+
22+
def _empty_filepath():
23+
return os.path.abspath(
24+
os.path.join(
25+
os.path.dirname(__file__), "..", "input", "benchmark_minimal_file.py"
26+
)
27+
)
28+
29+
30+
class SleepingChecker(BaseChecker):
31+
""" A checker that sleeps, the wall-clock time should reduce as we add workers
32+
33+
As we apply a roughly constant amount of "work" in this checker any variance is
34+
likely to be caused by the pylint system. """
35+
36+
__implements__ = (pylint.interfaces.IRawChecker,)
37+
38+
name = "sleeper"
39+
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
40+
sleep_duration = 0.5 # the time to pretend we're doing work for
41+
42+
def process_module(self, _astroid):
43+
""" Sleeps for `sleep_duration` on each call
44+
45+
This effectively means each file costs ~`sleep_duration`+framework overhead """
46+
time.sleep(self.sleep_duration)
47+
48+
49+
class SleepingCheckerLong(BaseChecker):
50+
""" A checker that sleeps, the wall-clock time should reduce as we add workers
51+
52+
As we apply a roughly constant amount of "work" in this checker any variance is
53+
likely to be caused by the pylint system. """
54+
55+
__implements__ = (pylint.interfaces.IRawChecker,)
56+
57+
name = "long-sleeper"
58+
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
59+
sleep_duration = 0.5 # the time to pretend we're doing work for
60+
61+
def process_module(self, _astroid):
62+
""" Sleeps for `sleep_duration` on each call
63+
64+
This effectively means each file costs ~`sleep_duration`+framework overhead """
65+
time.sleep(self.sleep_duration)
66+
67+
68+
class NoWorkChecker(BaseChecker):
69+
""" A checker that sleeps, the wall-clock time should change as we add threads """
70+
71+
__implements__ = (pylint.interfaces.IRawChecker,)
72+
73+
name = "sleeper"
74+
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)}
75+
76+
def process_module(self, _astroid):
77+
pass
78+
79+
80+
@pytest.mark.benchmark(group="baseline",)
81+
class TestEstablishBaselineBenchmarks:
82+
""" Naive benchmarks for the high-level pylint framework
83+
84+
Because this benchmarks the fundemental and common parts and changes seen here will
85+
impact everything else """
86+
87+
empty_filepath = _empty_filepath()
88+
empty_file_info = (
89+
"name-emptyfile-file",
90+
_empty_filepath(),
91+
"modname-emptyfile-mod",
92+
)
93+
lot_of_files = 500
94+
95+
def test_baseline_benchmark_j1(self, benchmark):
96+
""" Establish a baseline of pylint performance with no work
97+
98+
We will add extra Checkers in other benchmarks.
99+
100+
Because this is so simple, if this regresses something very serious has happened
101+
"""
102+
linter = PyLinter(reporter=Reporter())
103+
fileinfos = [self.empty_filepath] # Single file to end-to-end the system
104+
assert linter.config.jobs == 1
105+
assert len(linter._checkers) == 1, "Should just have 'master'"
106+
benchmark(linter.check, fileinfos)
107+
assert linter.msg_status == 0, (
108+
"Expected no errors to be thrown: %s"
109+
% pprint.pformat(linter.reporter.messages)
110+
)
111+
112+
def test_baseline_benchmark_j10(self, benchmark):
113+
""" Establish a baseline of pylint performance with no work across threads
114+
115+
Same as `test_baseline_benchmark_j1` but we use -j10 with 10 fake files to
116+
ensure end-to-end-system invoked.
117+
118+
Because this is also so simple, if this regresses something very serious has
119+
happened.
120+
"""
121+
linter = PyLinter(reporter=Reporter())
122+
linter.config.jobs = 10
123+
124+
# Create file per worker, using all workers
125+
fileinfos = [self.empty_filepath for _ in range(linter.config.jobs)]
126+
127+
assert linter.config.jobs == 10
128+
assert len(linter._checkers) == 1, "Should have 'master'"
129+
benchmark(linter.check, fileinfos)
130+
assert linter.msg_status == 0, (
131+
"Expected no errors to be thrown: %s"
132+
% pprint.pformat(linter.reporter.messages)
133+
)
134+
135+
def test_baseline_benchmark_check_parallel_j10(self, benchmark):
136+
""" Should demonstrate times very close to `test_baseline_benchmark_j10` """
137+
linter = PyLinter(reporter=Reporter())
138+
139+
# Create file per worker, using all workers
140+
fileinfos = [self.empty_file_info for _ in range(linter.config.jobs)]
141+
142+
assert len(linter._checkers) == 1, "Should have 'master'"
143+
benchmark(check_parallel, linter, jobs=10, files=fileinfos)
144+
assert linter.msg_status == 0, (
145+
"Expected no errors to be thrown: %s"
146+
% pprint.pformat(linter.reporter.messages)
147+
)
148+
149+
def test_baseline_lots_of_files_j1(self, benchmark):
150+
""" Establish a baseline with only 'master' checker being run in -j1
151+
152+
We do not register any checkers except the default 'master', so the cost is just
153+
that of the system with a lot of files registerd """
154+
if benchmark.disabled:
155+
benchmark(print, "skipping, only benchmark large file counts")
156+
return # _only_ run this test is profiling
157+
linter = PyLinter(reporter=Reporter())
158+
linter.config.jobs = 1
159+
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
160+
assert linter.config.jobs == 1
161+
assert len(linter._checkers) == 1, "Should have 'master'"
162+
benchmark(linter.check, fileinfos)
163+
assert linter.msg_status == 0, (
164+
"Expected no errors to be thrown: %s"
165+
% pprint.pformat(linter.reporter.messages)
166+
)
167+
168+
def test_baseline_lots_of_files_j10(self, benchmark):
169+
""" Establish a baseline with only 'master' checker being run in -j10
170+
171+
As with the -j1 variant above `test_baseline_lots_of_files_j1`, we do not
172+
register any checkers except the default 'master', so the cost is just that of
173+
the check_parallel system across 10 workers, plus the overhead of PyLinter """
174+
if benchmark.disabled:
175+
benchmark(print, "skipping, only benchmark large file counts")
176+
return # _only_ run this test is profiling
177+
linter = PyLinter(reporter=Reporter())
178+
linter.config.jobs = 10
179+
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
180+
assert linter.config.jobs == 10
181+
assert len(linter._checkers) == 1, "Should have 'master'"
182+
benchmark(linter.check, fileinfos)
183+
assert linter.msg_status == 0, (
184+
"Expected no errors to be thrown: %s"
185+
% pprint.pformat(linter.reporter.messages)
186+
)
187+
188+
def test_baseline_lots_of_files_j1_empty_checker(self, benchmark):
189+
""" Baselines pylint for a single extra checker being run in -j1, for N-files
190+
191+
We use a checker that does no work, so the cost is just that of the system at
192+
scale """
193+
if benchmark.disabled:
194+
benchmark(print, "skipping, only benchmark large file counts")
195+
return # _only_ run this test is profiling
196+
linter = PyLinter(reporter=Reporter())
197+
linter.config.jobs = 1
198+
linter.register_checker(NoWorkChecker(linter))
199+
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
200+
assert linter.config.jobs == 1
201+
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
202+
benchmark(linter.check, fileinfos)
203+
assert linter.msg_status == 0, (
204+
"Expected no errors to be thrown: %s"
205+
% pprint.pformat(linter.reporter.messages)
206+
)
207+
208+
def test_baseline_lots_of_files_j10_empty_checker(self, benchmark):
209+
""" Baselines pylint for a single extra checker being run in -j10, for N-files
210+
211+
We use a checker that does no work, so the cost is just that of the system at
212+
scale, across workers """
213+
if benchmark.disabled:
214+
benchmark(print, "skipping, only benchmark large file counts")
215+
return # _only_ run this test is profiling
216+
linter = PyLinter(reporter=Reporter())
217+
linter.config.jobs = 10
218+
linter.register_checker(NoWorkChecker(linter))
219+
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
220+
assert linter.config.jobs == 10
221+
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
222+
benchmark(linter.check, fileinfos)
223+
assert linter.msg_status == 0, (
224+
"Expected no errors to be thrown: %s"
225+
% pprint.pformat(linter.reporter.messages)
226+
)
227+
228+
def test_baseline_benchmark_j1_single_working_checker(self, benchmark):
229+
""" Establish a baseline of single-worker performance for PyLinter
230+
231+
Here we mimick a single Checker that does some work so that we can see the
232+
impact of running a simple system with -j1 against the same system with -j10.
233+
234+
We expect this benchmark to take very close to
235+
`numfiles*SleepingChecker.sleep_duration` """
236+
if benchmark.disabled:
237+
benchmark(print, "skipping, do not want to sleep in main tests")
238+
return # _only_ run this test is profiling
239+
linter = PyLinter(reporter=Reporter())
240+
linter.register_checker(SleepingChecker(linter))
241+
242+
# Check the same number of files as
243+
# `test_baseline_benchmark_j10_single_working_checker`
244+
fileinfos = [self.empty_filepath for _ in range(10)]
245+
246+
assert linter.config.jobs == 1
247+
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
248+
benchmark(linter.check, fileinfos)
249+
assert linter.msg_status == 0, (
250+
"Expected no errors to be thrown: %s"
251+
% pprint.pformat(linter.reporter.messages)
252+
)
253+
254+
def test_baseline_benchmark_j10_single_working_checker(self, benchmark):
255+
""" Establishes baseline of multi-worker performance for PyLinter/check_parallel
256+
257+
We expect this benchmark to take less time that test_baseline_benchmark_j1,
258+
`error_margin*(1/J)*(numfiles*SleepingChecker.sleep_duration)`
259+
260+
Because of the cost of the framework and system the performance difference will
261+
*not* be 1/10 of -j1 versions. """
262+
if benchmark.disabled:
263+
benchmark(print, "skipping, do not want to sleep in main tests")
264+
return # _only_ run this test is profiling
265+
linter = PyLinter(reporter=Reporter())
266+
linter.config.jobs = 10
267+
linter.register_checker(SleepingChecker(linter))
268+
269+
# Check the same number of files as
270+
# `test_baseline_benchmark_j1_single_working_checker`
271+
fileinfos = [self.empty_filepath for _ in range(10)]
272+
273+
assert linter.config.jobs == 10
274+
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'"
275+
benchmark(linter.check, fileinfos)
276+
assert linter.msg_status == 0, (
277+
"Expected no errors to be thrown: %s"
278+
% pprint.pformat(linter.reporter.messages)
279+
)
280+
281+
def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark):
282+
""" Runs a single file, with -j1, against all plug-ins
283+
284+
... that's the intent at least.
285+
"""
286+
# Just 1 file, but all Checkers/Extensions
287+
fileinfos = [self.empty_filepath]
288+
289+
runner = benchmark(Run, fileinfos, reporter=Reporter(), do_exit=False)
290+
assert runner.linter.config.jobs == 1
291+
print("len(runner.linter._checkers)", len(runner.linter._checkers))
292+
assert len(runner.linter._checkers) > 1, "Should have more than 'master'"
293+
294+
assert runner.linter.msg_status == 0, (
295+
"Expected no errors to be thrown: %s"
296+
% pprint.pformat(runner.linter.reporter.messages)
297+
)
298+
299+
def test_baseline_benchmark_j1_all_checks_lots_of_files(self, benchmark):
300+
""" Runs lots of files, with -j1, against all plug-ins
301+
302+
... that's the intent at least.
303+
"""
304+
if benchmark.disabled:
305+
benchmark(print, "skipping, only benchmark large file counts")
306+
return # _only_ run this test is profiling
307+
linter = PyLinter()
308+
309+
# Register all checkers/extensions and enable them
310+
register_plugins(
311+
linter, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
312+
)
313+
linter.load_default_plugins()
314+
linter.enable("all")
315+
316+
# Just 1 file, but all Checkers/Extensions
317+
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)]
318+
319+
assert linter.config.jobs == 1
320+
print("len(linter._checkers)", len(linter._checkers))
321+
assert len(linter._checkers) > 1, "Should have more than 'master'"
322+
benchmark(linter.check, fileinfos)

tests/input/benchmark_minimal_file.py

Whitespace-only changes.

0 commit comments

Comments
 (0)