Skip to content

Commit ae374af

Browse files
authored
Fix in-memory cache reuse (#4199)
Fixes #4185. Fixes #4198. Fixes #4195. Fixes #4208. Also some improvements to mypy/incremental_checker.py and report more stats about in-memory cache.
1 parent b240125 commit ae374af

File tree

3 files changed

+168
-26
lines changed

3 files changed

+168
-26
lines changed

misc/incremental_checker.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def run_mypy(target_file_path: Optional[str],
157157

158158

159159
def start_daemon(mypy_cache_path: str) -> None:
160-
cmd = DAEMON_CMD + ["restart", "--", "--cache-dir", mypy_cache_path]
160+
cmd = DAEMON_CMD + ["restart", "--log-file", "./@incr-chk-logs", "--", "--cache-dir", mypy_cache_path]
161161
execute(cmd)
162162

163163

@@ -287,6 +287,8 @@ def test_repo(target_repo_url: str, temp_repo_path: str,
287287
else:
288288
raise RuntimeError("Invalid option: {}".format(range_type))
289289
commits = get_commits_starting_at(temp_repo_path, start_commit)
290+
if params.limit:
291+
commits = commits[:params.limit]
290292
if params.sample:
291293
seed = params.seed or base64.urlsafe_b64encode(os.urandom(15)).decode('ascii')
292294
random.seed(seed)
@@ -308,7 +310,8 @@ def test_repo(target_repo_url: str, temp_repo_path: str,
308310
exit_on_error=params.exit_on_error)
309311

310312
# Stage 5: Remove temp files, stop daemon
311-
cleanup(temp_repo_path, mypy_cache_path)
313+
if not params.keep_temporary_files:
314+
cleanup(temp_repo_path, mypy_cache_path)
312315
if params.daemon:
313316
print('Stopping daemon')
314317
stop_daemon()
@@ -332,13 +335,17 @@ def main() -> None:
332335
help="the name of the file or directory to typecheck")
333336
parser.add_argument("-x", "--exit-on-error", action='store_true',
334337
help="Exits as soon as an error occurs")
338+
parser.add_argument("--keep-temporary-files", action='store_true',
339+
help="Keep temporary files on exit")
335340
parser.add_argument("--cache-path", default=CACHE_PATH, metavar="DIR",
336341
help="sets a custom location to store cache data")
337342
parser.add_argument("--branch", default=None, metavar="NAME",
338343
help="check out and test a custom branch"
339344
"uses the default if not specified")
340345
parser.add_argument("--sample", type=int, help="use a random sample of size SAMPLE")
341346
parser.add_argument("--seed", type=str, help="random seed")
347+
parser.add_argument("--limit", type=int,
348+
help="maximum number of commits to use (default until end)")
342349
parser.add_argument("--mypy-script", type=str, help="alternate mypy script to run")
343350
parser.add_argument("--daemon", action='store_true',
344351
help="use mypy daemon instead of incremental (highly experimental)")

mypy/build.py

Lines changed: 97 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
if MYPY:
3333
from typing import Deque
3434

35-
from mypy.nodes import (MypyFile, Node, ImportBase, Import, ImportFrom, ImportAll)
35+
from mypy.nodes import (MODULE_REF, MypyFile, Node, ImportBase, Import, ImportFrom, ImportAll)
3636
from mypy.semanal_pass1 import SemanticAnalyzerPass1
3737
from mypy.semanal import SemanticAnalyzerPass2
3838
from mypy.semanal_pass3 import SemanticAnalyzerPass3
@@ -179,6 +179,8 @@ def build(sources: List[BuildSource],
179179
# multiple builds, there could be a mix of files/modules, so its easier
180180
# to just define the semantics that we always add the current director
181181
# to the lib_path
182+
# TODO: Don't do this in some cases; for motivation see see
183+
# https://github.com/python/mypy/issues/4195#issuecomment-341915031
182184
lib_path.insert(0, os.getcwd())
183185

184186
# Prepend a config-defined mypy path.
@@ -1617,7 +1619,6 @@ def mark_as_rechecked(self) -> None:
16171619

16181620
def mark_interface_stale(self, *, on_errors: bool = False) -> None:
16191621
"""Marks this module as having a stale public interface, and discards the cache data."""
1620-
self.meta = None
16211622
self.externally_same = False
16221623
if not on_errors:
16231624
self.manager.stale_modules.add(self.id)
@@ -1899,6 +1900,7 @@ def write_cache(self) -> None:
18991900
is_errors = self.manager.errors.is_errors()
19001901
if is_errors:
19011902
delete_cache(self.id, self.path, self.manager)
1903+
self.meta = None
19021904
self.mark_interface_stale(on_errors=True)
19031905
return
19041906
dep_prios = [self.priorities.get(dep, PRI_HIGH) for dep in self.dependencies]
@@ -1916,6 +1918,7 @@ def write_cache(self) -> None:
19161918

19171919

19181920
def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
1921+
set_orig = set(manager.saved_cache)
19191922
manager.log()
19201923
manager.log("Mypy version %s" % __version__)
19211924
t0 = time.time()
@@ -1936,7 +1939,17 @@ def dispatch(sources: List[BuildSource], manager: BuildManager) -> Graph:
19361939
if manager.options.warn_unused_ignores:
19371940
# TODO: This could also be a per-module option.
19381941
manager.errors.generate_unused_ignore_notes()
1939-
manager.saved_cache.update(preserve_cache(graph))
1942+
updated = preserve_cache(graph)
1943+
set_updated = set(updated)
1944+
manager.saved_cache.clear()
1945+
manager.saved_cache.update(updated)
1946+
set_final = set(manager.saved_cache)
1947+
# These keys have numbers in them to force a sort order.
1948+
manager.add_stats(saved_cache_1orig=len(set_orig),
1949+
saved_cache_2updated=len(set_updated & set_orig),
1950+
saved_cache_3added=len(set_final - set_orig),
1951+
saved_cache_4removed=len(set_orig - set_final),
1952+
saved_cache_5final=len(set_final))
19401953
if manager.options.dump_deps:
19411954
# This speeds up startup a little when not using the daemon mode.
19421955
from mypy.server.deps import dump_all_dependencies
@@ -2035,7 +2048,19 @@ def load_graph(sources: List[BuildSource], manager: BuildManager) -> Graph:
20352048
while new:
20362049
st = new.popleft()
20372050
assert st.ancestors is not None
2038-
for dep in st.ancestors + st.dependencies + st.suppressed:
2051+
# Strip out indirect dependencies. These will be dealt with
2052+
# when they show up as direct dependencies, and there's a
2053+
# scenario where they hurt:
2054+
# - Suppose A imports B and B imports C.
2055+
# - Suppose on the next round:
2056+
# - C is deleted;
2057+
# - B is updated to remove the dependency on C;
2058+
# - A is unchanged.
2059+
# - In this case A's cached *direct* dependencies are still valid
2060+
# (since direct dependencies reflect the imports found in the source)
2061+
# but A's cached *indirect* dependency on C is wrong.
2062+
dependencies = [dep for dep in st.dependencies if st.priorities.get(dep) != PRI_INDIRECT]
2063+
for dep in st.ancestors + dependencies + st.suppressed:
20392064
# We don't want to recheck imports marked with '# type: ignore'
20402065
# so we ignore any suppressed module not explicitly re-included
20412066
# from the command line.
@@ -2117,7 +2142,7 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
21172142
for id in scc:
21182143
deps.update(graph[id].dependencies)
21192144
deps -= ascc
2120-
stale_deps = {id for id in deps if not graph[id].is_interface_fresh()}
2145+
stale_deps = {id for id in deps if id in graph and not graph[id].is_interface_fresh()}
21212146
if not manager.options.quick_and_dirty:
21222147
fresh = fresh and not stale_deps
21232148
undeps = set()
@@ -2174,8 +2199,9 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
21742199

21752200
scc_str = " ".join(scc)
21762201
if fresh:
2177-
manager.trace("Queuing %s SCC (%s)" % (fresh_msg, scc_str))
2178-
fresh_scc_queue.append(scc)
2202+
if not maybe_reuse_in_memory_tree(graph, scc, manager):
2203+
manager.trace("Queuing %s SCC (%s)" % (fresh_msg, scc_str))
2204+
fresh_scc_queue.append(scc)
21792205
else:
21802206
if len(fresh_scc_queue) > 0:
21812207
manager.log("Processing {} queued fresh SCCs".format(len(fresh_scc_queue)))
@@ -2263,23 +2289,6 @@ def process_fresh_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
22632289
22642290
If the tree is loaded from memory ('saved_cache') it's even quicker.
22652291
"""
2266-
saved_cache = manager.saved_cache
2267-
# Check that all nodes are available for loading from memory.
2268-
if all(id in saved_cache for id in scc):
2269-
deps = set(dep for id in scc for dep in graph[id].dependencies if dep in graph)
2270-
# Check that all dependencies were loaded from memory.
2271-
# If not, some dependency was reparsed but the interface hash
2272-
# wasn't changed -- in that case we can't reuse the tree.
2273-
if all(graph[dep].is_from_saved_cache for dep in deps):
2274-
trees = {id: saved_cache[id][1] for id in scc}
2275-
for id, tree in trees.items():
2276-
manager.add_stats(reused_trees=1)
2277-
manager.trace("Reusing saved tree %s" % id)
2278-
st = graph[id]
2279-
st.tree = tree # This is never overwritten.
2280-
st.is_from_saved_cache = True
2281-
manager.modules[id] = tree
2282-
return
22832292
for id in scc:
22842293
graph[id].load_tree()
22852294
for id in scc:
@@ -2290,6 +2299,70 @@ def process_fresh_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
22902299
graph[id].patch_dependency_parents()
22912300

22922301

2302+
def maybe_reuse_in_memory_tree(graph: Graph, scc: List[str], manager: BuildManager) -> bool:
2303+
"""Set the trees for the given SCC from the in-memory cache, if all valid.
2304+
2305+
If any saved tree for this SCC is invalid, set the trees for all
2306+
SCC members to None and mark as not-from-cache.
2307+
"""
2308+
if not can_reuse_in_memory_tree(graph, scc, manager):
2309+
for id in scc:
2310+
manager.add_stats(cleared_trees=1)
2311+
manager.trace("Clearing tree %s" % id)
2312+
st = graph[id]
2313+
st.tree = None
2314+
st.is_from_saved_cache = False
2315+
if id in manager.modules:
2316+
del manager.modules[id]
2317+
return False
2318+
trees = {id: manager.saved_cache[id][1] for id in scc}
2319+
for id, tree in trees.items():
2320+
manager.add_stats(reused_trees=1)
2321+
manager.trace("Reusing saved tree %s" % id)
2322+
st = graph[id]
2323+
st.tree = tree
2324+
st.is_from_saved_cache = True
2325+
manager.modules[id] = tree
2326+
# Delete any submodules from the module that aren't
2327+
# dependencies of the module; they will be re-added once
2328+
# imported. It's possible that the parent module is reused
2329+
# but a submodule isn't; we don't want to accidentally link
2330+
# into the old submodule's tree. See also
2331+
# patch_dependency_parents() above. The exception for subname
2332+
# in st.dependencies handles the case where 'import m'
2333+
# guarantees that some submodule of m is also available
2334+
# (e.g. 'os.path'); in those cases the submodule is an
2335+
# explicit dependency of the parent.
2336+
for name in list(tree.names):
2337+
sym = tree.names[name]
2338+
subname = id + '.' + name
2339+
if (sym.kind == MODULE_REF
2340+
and sym.node is not None
2341+
and sym.node.fullname() == subname
2342+
and subname not in st.dependencies):
2343+
manager.trace("Purging %s" % subname)
2344+
del tree.names[name]
2345+
return True
2346+
2347+
2348+
def can_reuse_in_memory_tree(graph: Graph, scc: List[str], manager: BuildManager) -> bool:
2349+
"""Check whether the given SCC can safely reuse the trees from saved_cache.
2350+
2351+
Assumes the SCC is already considered fresh.
2352+
"""
2353+
saved_cache = manager.saved_cache
2354+
# Check that all nodes are available for loading from memory.
2355+
if all(id in saved_cache for id in scc):
2356+
# Check that all dependencies were loaded from memory.
2357+
# If not, some dependency was reparsed but the interface hash
2358+
# wasn't changed -- in that case we can't reuse the tree.
2359+
deps = set(dep for id in scc for dep in graph[id].dependencies if dep in graph)
2360+
deps -= set(scc) # Subtract the SCC itself (else nothing will be safe)
2361+
if all(graph[dep].is_from_saved_cache for dep in deps):
2362+
return True
2363+
return False
2364+
2365+
22932366
def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> None:
22942367
"""Process the modules in one SCC from source code.
22952368

test-data/unit/check-incremental.test

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3376,3 +3376,65 @@ def f() -> None:
33763376
[out2]
33773377
[out3]
33783378
[out4]
3379+
3380+
[case testTreeShadowingViaParentPackage]
3381+
import m.semanal
3382+
3383+
[file m/__init__.py]
3384+
pass
3385+
3386+
[file m/nodes.py]
3387+
if False:
3388+
import m.types
3389+
import m.semanal
3390+
class Node:
3391+
line: int
3392+
class FuncBase(Node):
3393+
type: m.types.Type
3394+
class OverloadedFuncDef(FuncBase): pass
3395+
3396+
[file m/types.py]
3397+
from m.nodes import Node
3398+
class Type(Node): pass
3399+
class Overloaded(Type): pass
3400+
3401+
[file m/semanal.py]
3402+
from m.nodes import OverloadedFuncDef
3403+
from m.types import Overloaded
3404+
3405+
class C:
3406+
def func(self, defn: OverloadedFuncDef):
3407+
defn.type = Overloaded()
3408+
defn.type.line = 0
3409+
3410+
[file m/nodes.py.2]
3411+
if False:
3412+
import m.types
3413+
import m.semanal
3414+
class Node:
3415+
line: int
3416+
class FuncBase(Node):
3417+
type: m.types.Type
3418+
class OverloadedFuncDef(FuncBase): pass
3419+
extra = 1
3420+
3421+
[file m/types.py.2]
3422+
from m.nodes import Node
3423+
class Type(Node): pass
3424+
class Overloaded(Type): pass
3425+
extra = 1
3426+
[builtins fixtures/list.pyi]
3427+
3428+
[file m/semanal.py.2]
3429+
from m.nodes import OverloadedFuncDef
3430+
from m.types import Overloaded
3431+
3432+
class C:
3433+
def func(self, defn: OverloadedFuncDef):
3434+
defn.type = Overloaded()
3435+
defn.type.line = 0
3436+
3437+
extra = 1
3438+
3439+
[out1]
3440+
[out2]

0 commit comments

Comments
 (0)