Skip to content

Commit d48d548

Browse files
authored
introduce per-file timing-stats (#12639)
When profiling mypy over a large codebase, it can be useful to know which files are slowest to typecheck. Gather per-file timing stats and expose them through a new (hidden) command line switch
1 parent d64efcd commit d48d548

File tree

7 files changed

+75
-8
lines changed

7 files changed

+75
-8
lines changed

mypy/build.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
from mypy.errors import Errors, CompileError, ErrorInfo, report_internal_error
3737
from mypy.util import (
3838
DecodeError, decode_python_encoding, is_sub_path, get_mypy_comments, module_prefix,
39-
read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes
39+
read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes,
40+
time_ref, time_spent_us
4041
)
4142
if TYPE_CHECKING:
4243
from mypy.report import Reports # Avoid unconditional slow import
@@ -256,6 +257,8 @@ def _build(sources: List[BuildSource],
256257
graph = dispatch(sources, manager, stdout)
257258
if not options.fine_grained_incremental:
258259
TypeState.reset_all_subtype_caches()
260+
if options.timing_stats is not None:
261+
dump_timing_stats(options.timing_stats, graph)
259262
return BuildResult(manager, graph)
260263
finally:
261264
t0 = time.time()
@@ -1808,6 +1811,9 @@ class State:
18081811

18091812
fine_grained_deps_loaded = False
18101813

1814+
# Cumulative time spent on this file, in microseconds (for profiling stats)
1815+
time_spent_us: int = 0
1816+
18111817
def __init__(self,
18121818
id: Optional[str],
18131819
path: Optional[str],
@@ -2034,6 +2040,8 @@ def parse_file(self) -> None:
20342040
else:
20352041
manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id))
20362042

2043+
t0 = time_ref()
2044+
20372045
with self.wrap_context():
20382046
source = self.source
20392047
self.source = None # We won't need it again.
@@ -2079,6 +2087,8 @@ def parse_file(self) -> None:
20792087
self.tree.ignored_lines,
20802088
self.ignore_all or self.options.ignore_errors)
20812089

2090+
self.time_spent_us += time_spent_us(t0)
2091+
20822092
if not cached:
20832093
# Make a copy of any errors produced during parse time so that
20842094
# fine-grained mode can repeat them when the module is
@@ -2113,6 +2123,9 @@ def semantic_analysis_pass1(self) -> None:
21132123
"""
21142124
options = self.options
21152125
assert self.tree is not None
2126+
2127+
t0 = time_ref()
2128+
21162129
# Do the first pass of semantic analysis: analyze the reachability
21172130
# of blocks and import statements. We must do this before
21182131
# processing imports, since this may mark some import statements as
@@ -2131,6 +2144,7 @@ def semantic_analysis_pass1(self) -> None:
21312144
if options.allow_redefinition:
21322145
# Perform more renaming across the AST to allow variable redefinitions
21332146
self.tree.accept(VariableRenameVisitor())
2147+
self.time_spent_us += time_spent_us(t0)
21342148

21352149
def add_dependency(self, dep: str) -> None:
21362150
if dep not in self.dependencies_set:
@@ -2188,8 +2202,10 @@ def compute_dependencies(self) -> None:
21882202
def type_check_first_pass(self) -> None:
21892203
if self.options.semantic_analysis_only:
21902204
return
2205+
t0 = time_ref()
21912206
with self.wrap_context():
21922207
self.type_checker().check_first_pass()
2208+
self.time_spent_us += time_spent_us(t0)
21932209

21942210
def type_checker(self) -> TypeChecker:
21952211
if not self._type_checker:
@@ -2207,14 +2223,17 @@ def type_map(self) -> Dict[Expression, Type]:
22072223
def type_check_second_pass(self) -> bool:
22082224
if self.options.semantic_analysis_only:
22092225
return False
2226+
t0 = time_ref()
22102227
with self.wrap_context():
22112228
return self.type_checker().check_second_pass()
2229+
self.time_spent_us += time_spent_us(t0)
22122230

22132231
def finish_passes(self) -> None:
22142232
assert self.tree is not None, "Internal error: method must be called on parsed file only"
22152233
manager = self.manager
22162234
if self.options.semantic_analysis_only:
22172235
return
2236+
t0 = time_ref()
22182237
with self.wrap_context():
22192238
# Some tests (and tools) want to look at the set of all types.
22202239
options = manager.options
@@ -2237,6 +2256,7 @@ def finish_passes(self) -> None:
22372256
self.free_state()
22382257
if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
22392258
free_tree(self.tree)
2259+
self.time_spent_us += time_spent_us(t0)
22402260

22412261
def free_state(self) -> None:
22422262
if self._type_checker:
@@ -2771,6 +2791,16 @@ def dumps(self) -> str:
27712791
json.dumps(self.deps))
27722792

27732793

2794+
def dump_timing_stats(path: str, graph: Graph) -> None:
2795+
"""
2796+
Dump timing stats for each file in the given graph
2797+
"""
2798+
with open(path, 'w') as f:
2799+
for k in sorted(graph.keys()):
2800+
v = graph[k]
2801+
f.write('{} {}\n'.format(v.id, v.time_spent_us))
2802+
2803+
27742804
def dump_graph(graph: Graph, stdout: Optional[TextIO] = None) -> None:
27752805
"""Dump the graph as a JSON string to stdout.
27762806

mypy/main.py

+3
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,9 @@ def add_invertible_flag(flag: str,
835835
parser.add_argument(
836836
'--dump-build-stats', action='store_true',
837837
help=argparse.SUPPRESS)
838+
# dump timing stats for each processed file into the given output file
839+
parser.add_argument(
840+
'--timing-stats', dest='timing_stats', help=argparse.SUPPRESS)
838841
# --debug-cache will disable any cache-related compressions/optimizations,
839842
# which will make the cache writing process output pretty-printed JSON (which
840843
# is easier to debug).

mypy/options.py

+1
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def __init__(self) -> None:
263263
self.dump_inference_stats = False
264264
self.dump_build_stats = False
265265
self.enable_incomplete_features = False
266+
self.timing_stats: Optional[str] = None
266267

267268
# -- test options --
268269
# Stop after the semantic analysis phase

mypy/test/data.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import sys
1111

1212
import pytest
13-
from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union
13+
from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union, Pattern
1414

1515
from mypy.test.config import test_data_prefix, test_temp_dir, PREFIX
1616

@@ -44,7 +44,7 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None:
4444
normalize_output = True
4545

4646
files: List[Tuple[str, str]] = [] # path and contents
47-
output_files: List[Tuple[str, str]] = [] # path and contents for output files
47+
output_files: List[Tuple[str, Union[str, Pattern[str]]]] = [] # output path and contents
4848
output: List[str] = [] # Regular output errors
4949
output2: Dict[int, List[str]] = {} # Output errors for incremental, runs 2+
5050
deleted_paths: Dict[int, Set[str]] = {} # from run number of paths
@@ -57,13 +57,15 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None:
5757
# optionally followed by lines of text.
5858
item = first_item = test_items[0]
5959
for item in test_items[1:]:
60-
if item.id == 'file' or item.id == 'outfile':
60+
if item.id in {'file', 'outfile', 'outfile-re'}:
6161
# Record an extra file needed for the test case.
6262
assert item.arg is not None
6363
contents = expand_variables('\n'.join(item.data))
6464
file_entry = (join(base_path, item.arg), contents)
6565
if item.id == 'file':
6666
files.append(file_entry)
67+
elif item.id == 'outfile-re':
68+
output_files.append((file_entry[0], re.compile(file_entry[1].rstrip(), re.S)))
6769
else:
6870
output_files.append(file_entry)
6971
elif item.id in ('builtins', 'builtins_py2'):
@@ -220,7 +222,7 @@ class DataDrivenTestCase(pytest.Item):
220222

221223
# Extra attributes used by some tests.
222224
last_line: int
223-
output_files: List[Tuple[str, str]] # Path and contents for output files
225+
output_files: List[Tuple[str, Union[str, Pattern[str]]]] # Path and contents for output files
224226
deleted_paths: Dict[int, Set[str]] # Mapping run number -> paths
225227
triggered: List[str] # Active triggers (one line per incremental step)
226228

mypy/test/helpers.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import shutil
66
import contextlib
77

8-
from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union
8+
from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union, Pattern
99

1010
from mypy import defaults
1111
import mypy.api as api
@@ -458,8 +458,17 @@ def check_test_output_files(testcase: DataDrivenTestCase,
458458
'Expected file {} was not produced by test case{}'.format(
459459
path, ' on step %d' % step if testcase.output2 else ''))
460460
with open(path, 'r', encoding='utf8') as output_file:
461-
actual_output_content = output_file.read().splitlines()
462-
normalized_output = normalize_file_output(actual_output_content,
461+
actual_output_content = output_file.read()
462+
463+
if isinstance(expected_content, Pattern):
464+
if expected_content.fullmatch(actual_output_content) is not None:
465+
continue
466+
raise AssertionError(
467+
'Output file {} did not match its expected output pattern\n---\n{}\n---'.format(
468+
path, actual_output_content)
469+
)
470+
471+
normalized_output = normalize_file_output(actual_output_content.splitlines(),
463472
os.path.abspath(test_temp_dir))
464473
# We always normalize things like timestamp, but only handle operating-system
465474
# specific things if requested.

mypy/util.py

+10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import hashlib
99
import io
1010
import shutil
11+
import time
1112

1213
from typing import (
1314
TypeVar, List, Tuple, Optional, Dict, Sequence, Iterable, Container, IO, Callable
@@ -763,3 +764,12 @@ def is_stub_package_file(file: str) -> bool:
763764

764765
def unnamed_function(name: Optional[str]) -> bool:
765766
return name is not None and name == "_"
767+
768+
769+
# TODO: replace with uses of perf_counter_ns when support for py3.6 is dropped
770+
# (or when mypy properly handles alternate definitions based on python version check
771+
time_ref = time.perf_counter
772+
773+
774+
def time_spent_us(t0: float) -> int:
775+
return int((time.perf_counter() - t0) * 1e6)

test-data/unit/cmdline.test

+12
Original file line numberDiff line numberDiff line change
@@ -1373,3 +1373,15 @@ exclude = (?x)(
13731373
1()
13741374
[out]
13751375
c/cpkg.py:1: error: "int" not callable
1376+
1377+
1378+
[case testCmdlineTimingStats]
1379+
# cmd: mypy --timing-stats timing.txt .
1380+
[file b/__init__.py]
1381+
[file b/c.py]
1382+
class C: pass
1383+
[outfile-re timing.txt]
1384+
.*
1385+
b \d+
1386+
b\.c \d+
1387+
.*

0 commit comments

Comments
 (0)