-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
benchmark| Adds basic performance benchmark baselines for pylint
Here we establish baseline benchmarks for the system when used in minimal way. Here we just confirm that -j1 vs -jN gives some boost in performance under simple situations, establishing a baseline for other benchmarks.
- Loading branch information
1 parent
cfbb933
commit 5b23a24
Showing
2 changed files
with
322 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,322 @@ | ||
""" Profiles basic -jX functionality """ | ||
# Copyright (c) 2020 Frank Harrison <[email protected]> | ||
|
||
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html | ||
# For details: https://github.com/PyCQA/pylint/blob/master/COPYING | ||
|
||
# pylint: disable=protected-access,missing-function-docstring,no-self-use | ||
|
||
import os | ||
import pprint | ||
import time | ||
|
||
import pytest | ||
|
||
import pylint.interfaces | ||
from pylint.checkers.base_checker import BaseChecker | ||
from pylint.lint import PyLinter, Run, check_parallel | ||
from pylint.testutils import TestReporter as Reporter | ||
from pylint.utils import register_plugins | ||
|
||
|
||
def _empty_filepath(): | ||
return os.path.abspath( | ||
os.path.join( | ||
os.path.dirname(__file__), "..", "input", "benchmark_minimal_file.py" | ||
) | ||
) | ||
|
||
|
||
class SleepingChecker(BaseChecker): | ||
""" A checker that sleeps, the wall-clock time should reduce as we add workers | ||
As we apply a roughly constant amount of "work" in this checker any variance is | ||
likely to be caused by the pylint system. """ | ||
|
||
__implements__ = (pylint.interfaces.IRawChecker,) | ||
|
||
name = "sleeper" | ||
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} | ||
sleep_duration = 0.5 # the time to pretend we're doing work for | ||
|
||
def process_module(self, _astroid): | ||
""" Sleeps for `sleep_duration` on each call | ||
This effectively means each file costs ~`sleep_duration`+framework overhead """ | ||
time.sleep(self.sleep_duration) | ||
|
||
|
||
class SleepingCheckerLong(BaseChecker): | ||
""" A checker that sleeps, the wall-clock time should reduce as we add workers | ||
As we apply a roughly constant amount of "work" in this checker any variance is | ||
likely to be caused by the pylint system. """ | ||
|
||
__implements__ = (pylint.interfaces.IRawChecker,) | ||
|
||
name = "long-sleeper" | ||
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} | ||
sleep_duration = 0.5 # the time to pretend we're doing work for | ||
|
||
def process_module(self, _astroid): | ||
""" Sleeps for `sleep_duration` on each call | ||
This effectively means each file costs ~`sleep_duration`+framework overhead """ | ||
time.sleep(self.sleep_duration) | ||
|
||
|
||
class NoWorkChecker(BaseChecker): | ||
""" A checker that sleeps, the wall-clock time should change as we add threads """ | ||
|
||
__implements__ = (pylint.interfaces.IRawChecker,) | ||
|
||
name = "sleeper" | ||
msgs = {"R9999": ("Test", "test-check", "Some helpful text.",)} | ||
|
||
def process_module(self, _astroid): | ||
pass | ||
|
||
|
||
@pytest.mark.benchmark(group="baseline",) | ||
class TestEstablishBaselineBenchmarks: | ||
""" Naive benchmarks for the high-level pylint framework | ||
Because this benchmarks the fundemental and common parts and changes seen here will | ||
impact everything else """ | ||
|
||
empty_filepath = _empty_filepath() | ||
empty_file_info = ( | ||
"name-emptyfile-file", | ||
_empty_filepath(), | ||
"modname-emptyfile-mod", | ||
) | ||
lot_of_files = 500 | ||
|
||
def test_baseline_benchmark_j1(self, benchmark): | ||
""" Establish a baseline of pylint performance with no work | ||
We will add extra Checkers in other benchmarks. | ||
Because this is so simple, if this regresses something very serious has happened | ||
""" | ||
linter = PyLinter(reporter=Reporter()) | ||
fileinfos = [self.empty_filepath] # Single file to end-to-end the system | ||
assert linter.config.jobs == 1 | ||
assert len(linter._checkers) == 1, "Should just have 'master'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_j10(self, benchmark): | ||
""" Establish a baseline of pylint performance with no work across threads | ||
Same as `test_baseline_benchmark_j1` but we use -j10 with 10 fake files to | ||
ensure end-to-end-system invoked. | ||
Because this is also so simple, if this regresses something very serious has | ||
happened. | ||
""" | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 10 | ||
|
||
# Create file per worker, using all workers | ||
fileinfos = [self.empty_filepath for _ in range(linter.config.jobs)] | ||
|
||
assert linter.config.jobs == 10 | ||
assert len(linter._checkers) == 1, "Should have 'master'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_check_parallel_j10(self, benchmark): | ||
""" Should demonstrate times very close to `test_baseline_benchmark_j10` """ | ||
linter = PyLinter(reporter=Reporter()) | ||
|
||
# Create file per worker, using all workers | ||
fileinfos = [self.empty_file_info for _ in range(linter.config.jobs)] | ||
|
||
assert len(linter._checkers) == 1, "Should have 'master'" | ||
benchmark(check_parallel, linter, jobs=10, files=fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_lots_of_files_j1(self, benchmark): | ||
""" Establish a baseline with only 'master' checker being run in -j1 | ||
We do not register any checkers except the default 'master', so the cost is just | ||
that of the system with a lot of files registerd """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, only benchmark large file counts") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 1 | ||
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] | ||
assert linter.config.jobs == 1 | ||
assert len(linter._checkers) == 1, "Should have 'master'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_lots_of_files_j10(self, benchmark): | ||
""" Establish a baseline with only 'master' checker being run in -j10 | ||
As with the -j1 variant above `test_baseline_lots_of_files_j1`, we do not | ||
register any checkers except the default 'master', so the cost is just that of | ||
the check_parallel system across 10 workers, plus the overhead of PyLinter """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, only benchmark large file counts") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 10 | ||
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] | ||
assert linter.config.jobs == 10 | ||
assert len(linter._checkers) == 1, "Should have 'master'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_lots_of_files_j1_empty_checker(self, benchmark): | ||
""" Baselines pylint for a single extra checker being run in -j1, for N-files | ||
We use a checker that does no work, so the cost is just that of the system at | ||
scale """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, only benchmark large file counts") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 1 | ||
linter.register_checker(NoWorkChecker(linter)) | ||
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] | ||
assert linter.config.jobs == 1 | ||
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_lots_of_files_j10_empty_checker(self, benchmark): | ||
""" Baselines pylint for a single extra checker being run in -j10, for N-files | ||
We use a checker that does no work, so the cost is just that of the system at | ||
scale, across workers """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, only benchmark large file counts") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 10 | ||
linter.register_checker(NoWorkChecker(linter)) | ||
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] | ||
assert linter.config.jobs == 10 | ||
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_j1_single_working_checker(self, benchmark): | ||
""" Establish a baseline of single-worker performance for PyLinter | ||
Here we mimick a single Checker that does some work so that we can see the | ||
impact of running a simple system with -j1 against the same system with -j10. | ||
We expect this benchmark to take very close to | ||
`numfiles*SleepingChecker.sleep_duration` """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, do not want to sleep in main tests") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.register_checker(SleepingChecker(linter)) | ||
|
||
# Check the same number of files as | ||
# `test_baseline_benchmark_j10_single_working_checker` | ||
fileinfos = [self.empty_filepath for _ in range(10)] | ||
|
||
assert linter.config.jobs == 1 | ||
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_j10_single_working_checker(self, benchmark): | ||
""" Establishes baseline of multi-worker performance for PyLinter/check_parallel | ||
We expect this benchmark to take less time that test_baseline_benchmark_j1, | ||
`error_margin*(1/J)*(numfiles*SleepingChecker.sleep_duration)` | ||
Because of the cost of the framework and system the performance difference will | ||
*not* be 1/10 of -j1 versions. """ | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, do not want to sleep in main tests") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter(reporter=Reporter()) | ||
linter.config.jobs = 10 | ||
linter.register_checker(SleepingChecker(linter)) | ||
|
||
# Check the same number of files as | ||
# `test_baseline_benchmark_j1_single_working_checker` | ||
fileinfos = [self.empty_filepath for _ in range(10)] | ||
|
||
assert linter.config.jobs == 10 | ||
assert len(linter._checkers) == 2, "Should have 'master' and 'sleeper'" | ||
benchmark(linter.check, fileinfos) | ||
assert linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_j1_all_checks_single_file(self, benchmark): | ||
""" Runs a single file, with -j1, against all plug-ins | ||
... that's the intent at least. | ||
""" | ||
# Just 1 file, but all Checkers/Extensions | ||
fileinfos = [self.empty_filepath] | ||
|
||
runner = benchmark(Run, fileinfos, reporter=Reporter(), do_exit=False) | ||
assert runner.linter.config.jobs == 1 | ||
print("len(runner.linter._checkers)", len(runner.linter._checkers)) | ||
assert len(runner.linter._checkers) > 1, "Should have more than 'master'" | ||
|
||
assert runner.linter.msg_status == 0, ( | ||
"Expected no errors to be thrown: %s" | ||
% pprint.pformat(runner.linter.reporter.messages) | ||
) | ||
|
||
def test_baseline_benchmark_j1_all_checks_lots_of_files(self, benchmark): | ||
""" Runs lots of files, with -j1, against all plug-ins | ||
... that's the intent at least. | ||
""" | ||
if benchmark.disabled: | ||
benchmark(print, "skipping, only benchmark large file counts") | ||
return # _only_ run this test is profiling | ||
linter = PyLinter() | ||
|
||
# Register all checkers/extensions and enable them | ||
register_plugins( | ||
linter, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) | ||
) | ||
linter.load_default_plugins() | ||
linter.enable("all") | ||
|
||
# Just 1 file, but all Checkers/Extensions | ||
fileinfos = [self.empty_filepath for _ in range(self.lot_of_files)] | ||
|
||
assert linter.config.jobs == 1 | ||
print("len(linter._checkers)", len(linter._checkers)) | ||
assert len(linter._checkers) > 1, "Should have more than 'master'" | ||
benchmark(linter.check, fileinfos) |
Empty file.