From 90fdc7770a26c389832c84631dcd05e154f0e3fa Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 23 Nov 2022 12:13:33 +0000 Subject: [PATCH 1/2] Add intenal flag for per-line type checking peformance --- mypy/build.py | 30 ++++++++++++++++++++++++------ mypy/checker.py | 5 ++++- mypy/checkexpr.py | 28 +++++++++++++++++++++++++--- mypy/main.py | 8 +++++++- mypy/options.py | 1 + mypy/util.py | 8 +++----- 6 files changed, 64 insertions(+), 16 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index ba54c81845e0..b32276dd3020 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -12,6 +12,7 @@ from __future__ import annotations +import collections import contextlib import errno import gc @@ -278,6 +279,8 @@ def _build( TypeState.reset_all_subtype_caches() if options.timing_stats is not None: dump_timing_stats(options.timing_stats, graph) + if options.line_checking_stats is not None: + dump_line_checking_stats(options.line_checking_stats, graph) return BuildResult(manager, graph) finally: t0 = time.time() @@ -1889,6 +1892,10 @@ class State: # Cumulative time spent on this file, in microseconds (for profiling stats) time_spent_us: int = 0 + # Per-line type-checking time (cumulative time spent type-checking expressions + # on a given source code line). + per_line_checking_time_ns: dict[int, int] + def __init__( self, id: str | None, @@ -1956,6 +1963,7 @@ def __init__( source = "" self.source = source self.add_ancestors() + self.per_line_checking_time_ns = collections.defaultdict(int) t0 = time.time() self.meta = validate_meta(self.meta, self.id, self.path, self.ignore_all, manager) self.manager.add_stats(validate_meta_time=time.time() - t0) @@ -2320,6 +2328,7 @@ def type_checker(self) -> TypeChecker: self.tree, self.xpath, manager.plugin, + self.per_line_checking_time_ns, ) return self._type_checker @@ -2945,13 +2954,22 @@ def dumps(self) -> str: def dump_timing_stats(path: str, graph: Graph) -> None: - """ - Dump timing stats for each file in the given graph - """ + """Dump timing stats for each file in the given graph.""" with open(path, "w") as f: - for k in sorted(graph.keys()): - v = graph[k] - f.write(f"{v.id} {v.time_spent_us}\n") + for id in sorted(graph): + f.write(f"{id} {graph[id].time_spent_us}\n") + + +def dump_line_checking_stats(path: str, graph: Graph) -> None: + """Dump per-line expression type checking stats.""" + with open(path, "w") as f: + for id in sorted(graph): + if not graph[id].per_line_checking_time_ns: + continue + f.write(f"{id}:\n") + for line in sorted(graph[id].per_line_checking_time_ns): + line_time = graph[id].per_line_checking_time_ns[line] + f.write(f"{line:>5} {line_time/1000:8.1f}\n") def dump_graph(graph: Graph, stdout: TextIO | None = None) -> None: diff --git a/mypy/checker.py b/mypy/checker.py index b0fde94025b6..431fde299dc0 100644 --- a/mypy/checker.py +++ b/mypy/checker.py @@ -364,6 +364,7 @@ def __init__( tree: MypyFile, path: str, plugin: Plugin, + per_line_checking_time_ns: dict[int, int], ) -> None: """Construct a type checker. @@ -376,7 +377,9 @@ def __init__( self.path = path self.msg = MessageBuilder(errors, modules) self.plugin = plugin - self.expr_checker = mypy.checkexpr.ExpressionChecker(self, self.msg, self.plugin) + self.expr_checker = mypy.checkexpr.ExpressionChecker( + self, self.msg, self.plugin, per_line_checking_time_ns + ) self.pattern_checker = PatternChecker(self, self.msg, self.plugin) self.tscope = Scope() self.scope = CheckerScope(tree) diff --git a/mypy/checkexpr.py b/mypy/checkexpr.py index b41a38825fb3..a9dada941add 100644 --- a/mypy/checkexpr.py +++ b/mypy/checkexpr.py @@ -3,6 +3,7 @@ from __future__ import annotations import itertools +import time from contextlib import contextmanager from typing import Callable, ClassVar, Iterator, List, Optional, Sequence, cast from typing_extensions import Final, TypeAlias as _TypeAlias, overload @@ -263,11 +264,21 @@ class ExpressionChecker(ExpressionVisitor[Type]): strfrm_checker: StringFormatterChecker plugin: Plugin - def __init__(self, chk: mypy.checker.TypeChecker, msg: MessageBuilder, plugin: Plugin) -> None: + def __init__( + self, + chk: mypy.checker.TypeChecker, + msg: MessageBuilder, + plugin: Plugin, + per_line_checking_time_ns: dict[int, int], + ) -> None: """Construct an expression type checker.""" self.chk = chk self.msg = msg self.plugin = plugin + self.per_line_checking_time_ns = per_line_checking_time_ns + # Are we already visiting some expression? This is used to avoid double counting + # time for nested expressions. + self.in_expression = False self.type_context = [None] # Temporary overrides for expression types. This is currently @@ -4727,7 +4738,14 @@ def accept( applies only to this expression and not any subexpressions. """ if node in self.type_overrides: + # This branch is very fast, there is no point timing it. return self.type_overrides[node] + # We don't use context manager here to get most precise data (and avoid overhead). + record_time = False + if not self.in_expression and self.chk.options.line_checking_stats is not None: + t0 = time.perf_counter_ns() + self.in_expression = True + record_time = True self.type_context.append(type_context) old_is_callee = self.is_callee self.is_callee = is_callee @@ -4762,9 +4780,13 @@ def accept( self.msg.disallowed_any_type(typ, node) if not self.chk.in_checked_function() or self.chk.current_node_deferred: - return AnyType(TypeOfAny.unannotated) + result: Type = AnyType(TypeOfAny.unannotated) else: - return typ + result = typ + if record_time: + self.per_line_checking_time_ns[node.line] += time.perf_counter_ns() - t0 + self.in_expression = False + return result def named_type(self, name: str) -> Instance: """Return an instance type with type given by the name and no type diff --git a/mypy/main.py b/mypy/main.py index 405596c20991..d0cb6ca4d505 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -1082,8 +1082,14 @@ def add_invertible_flag( "--inferstats", action="store_true", dest="dump_inference_stats", help=argparse.SUPPRESS ) parser.add_argument("--dump-build-stats", action="store_true", help=argparse.SUPPRESS) - # dump timing stats for each processed file into the given output file + # Dump timing stats for each processed file into the given output file parser.add_argument("--timing-stats", dest="timing_stats", help=argparse.SUPPRESS) + # Dump per line type checking timing stats for each processed file into the given + # output file. Only total time spent in each top level expression will be shown. + # Times are show in microseconds. + parser.add_argument( + "--line-checking-stats", dest="line_checking_stats", help=argparse.SUPPRESS + ) # --debug-cache will disable any cache-related compressions/optimizations, # which will make the cache writing process output pretty-printed JSON (which # is easier to debug). diff --git a/mypy/options.py b/mypy/options.py index 3a08ff9455ee..ffb6b201e70b 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -283,6 +283,7 @@ def __init__(self) -> None: self.enable_incomplete_features = False # deprecated self.enable_incomplete_feature: list[str] = [] self.timing_stats: str | None = None + self.line_checking_stats: str | None = None # -- test options -- # Stop after the semantic analysis phase diff --git a/mypy/util.py b/mypy/util.py index 04ed616ade07..cced4db34fc9 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -807,13 +807,11 @@ def unnamed_function(name: str | None) -> bool: return name is not None and name == "_" -# TODO: replace with uses of perf_counter_ns when support for py3.6 is dropped -# (or when mypy properly handles alternate definitions based on python version check -time_ref = time.perf_counter +time_ref = time.perf_counter_ns -def time_spent_us(t0: float) -> int: - return int((time.perf_counter() - t0) * 1e6) +def time_spent_us(t0: int) -> int: + return int((time.perf_counter_ns() - t0) / 1000) def plural_s(s: int | Sized) -> str: From cc9811340b23a9b0d65361da1e14730bfa622362 Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 23 Nov 2022 12:52:35 +0000 Subject: [PATCH 2/2] Perf tweak --- mypy/checkexpr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mypy/checkexpr.py b/mypy/checkexpr.py index a9dada941add..78ae412072f5 100644 --- a/mypy/checkexpr.py +++ b/mypy/checkexpr.py @@ -276,6 +276,7 @@ def __init__( self.msg = msg self.plugin = plugin self.per_line_checking_time_ns = per_line_checking_time_ns + self.collect_line_checking_stats = self.chk.options.line_checking_stats is not None # Are we already visiting some expression? This is used to avoid double counting # time for nested expressions. self.in_expression = False @@ -4742,7 +4743,7 @@ def accept( return self.type_overrides[node] # We don't use context manager here to get most precise data (and avoid overhead). record_time = False - if not self.in_expression and self.chk.options.line_checking_stats is not None: + if self.collect_line_checking_stats and not self.in_expression: t0 = time.perf_counter_ns() self.in_expression = True record_time = True