Skip to content

Commit 652f5ab

Browse files
authored
remote: use separate working tree instance for local cache (#3991)
* dvcignore: ignore paths outside of CleanTree root * remote: local remote/cache should always use its own working tree * repo tree (CleanTree) instance should not be relevant to local cache operations. When the dvcignore CleanTree is relevant, repo.tree should be used. For local remote/cache operations, the local remote work_tree should be used. * state: tie state to local cache working tree * tests: update tests * state: use os.path.exists instead of WorkingTree.exists in some cases
1 parent 28a123e commit 652f5ab

File tree

6 files changed

+24
-41
lines changed

6 files changed

+24
-41
lines changed

dvc/ignore.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,15 @@ def _parents_exist(self, path):
186186
if path.parent == self.tree_root or Repo.DVC_DIR in path.parts:
187187
return True
188188

189-
# if path is outside of tree, assume this is a local remote/local cache
190-
# link/move operation where we do not need to filter ignores
189+
# paths outside of the CleanTree root should be ignored
191190
path = relpath(path, self.tree_root)
192191
if path.startswith("..") or (
193192
os.name == "nt"
194193
and not os.path.commonprefix(
195194
[os.path.abspath(path), self.tree_root]
196195
)
197196
):
198-
return True
197+
return False
199198

200199
# check if parent directories are in our ignores, starting from
201200
# tree_root

dvc/remote/local.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,12 @@ def repo(self):
5050
return self.remote.repo
5151

5252
@cached_property
53-
def _work_tree(self):
54-
if self.repo:
55-
return WorkingTree(self.repo.root_dir)
56-
return None
57-
58-
@property
5953
def work_tree(self):
6054
# When using repo.brancher, repo.tree may change to/from WorkingTree to
6155
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
6256
# use its own WorkingTree instance.
63-
if self.repo and not is_working_tree(self.repo.tree):
64-
return self._work_tree
57+
if self.repo:
58+
return WorkingTree(self.repo.root_dir)
6559
return None
6660

6761
@staticmethod
@@ -72,35 +66,25 @@ def exists(self, path_info):
7266
assert isinstance(path_info, str) or path_info.scheme == "local"
7367
if not self.repo:
7468
return os.path.exists(path_info)
75-
if self.work_tree and self.work_tree.exists(path_info):
76-
return True
77-
return self.repo.tree.exists(path_info)
69+
return self.work_tree.exists(path_info)
7870

7971
def isfile(self, path_info):
8072
if not self.repo:
8173
return os.path.isfile(path_info)
82-
if self.work_tree and self.work_tree.isfile(path_info):
83-
return True
84-
return self.repo.tree.isfile(path_info)
74+
return self.work_tree.isfile(path_info)
8575

8676
def isdir(self, path_info):
8777
if not self.repo:
8878
return os.path.isdir(path_info)
89-
if self.work_tree and self.work_tree.isdir(path_info):
90-
return True
91-
return self.repo.tree.isdir(path_info)
79+
return self.work_tree.isdir(path_info)
9280

9381
def iscopy(self, path_info):
9482
return not (
9583
System.is_symlink(path_info) or System.is_hardlink(path_info)
9684
)
9785

9886
def walk_files(self, path_info, **kwargs):
99-
if self.work_tree:
100-
tree = self.work_tree
101-
else:
102-
tree = self.repo.tree
103-
for fname in tree.walk_files(path_info):
87+
for fname in self.work_tree.walk_files(path_info):
10488
yield PathInfo(fname)
10589

10690
def is_empty(self, path_info):

dvc/repo/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ def __init__(self, root_dir=None, scm=None, rev=None):
111111
friendly=True,
112112
)
113113

114+
self.cache = Cache(self)
115+
self.cloud = DataCloud(self)
116+
114117
if not scm:
115118
# NOTE: storing state and link_state in the repository itself to
116119
# avoid any possible state corruption in 'shared cache dir'
117120
# scenario.
118-
self.state = State(self)
119-
120-
self.cache = Cache(self)
121-
self.cloud = DataCloud(self)
121+
self.state = State(self.cache.local)
122122

123123
self.stage_cache = StageCache(self)
124124

dvc/state.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from urllib.parse import urlencode, urlunparse
88

99
from dvc.exceptions import DvcException
10-
from dvc.scm.tree import WorkingTree
1110
from dvc.utils import current_timestamp, relpath, to_chunks
1211
from dvc.utils.fs import get_inode, get_mtime_and_size, remove
1312

@@ -89,10 +88,11 @@ class State: # pylint: disable=too-many-instance-attributes
8988
MAX_INT = 2 ** 63 - 1
9089
MAX_UINT = 2 ** 64 - 2
9190

92-
def __init__(self, repo):
91+
def __init__(self, local_cache):
92+
repo = local_cache.repo
9393
self.repo = repo
9494
self.root_dir = repo.root_dir
95-
self.tree = WorkingTree(self.root_dir)
95+
self.tree = local_cache.tree.work_tree
9696

9797
state_config = repo.config.get("state", {})
9898
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
@@ -394,6 +394,9 @@ def get(self, path_info):
394394
assert isinstance(path_info, str) or path_info.scheme == "local"
395395
path = os.fspath(path_info)
396396

397+
# NOTE: use os.path.exists instead of WorkingTree.exists
398+
# WorkingTree.exists uses lexists() and will return True for broken
399+
# symlinks that we cannot stat() in get_mtime_and_size
397400
if not os.path.exists(path):
398401
return None
399402

@@ -420,7 +423,7 @@ def save_link(self, path_info):
420423
"""
421424
assert isinstance(path_info, str) or path_info.scheme == "local"
422425

423-
if not os.path.exists(path_info):
426+
if not self.tree.exists(path_info):
424427
return
425428

426429
mtime, _ = get_mtime_and_size(path_info, self.tree)
@@ -446,7 +449,7 @@ def get_unused_links(self, used):
446449
inode = self._from_sqlite(inode)
447450
path = os.path.join(self.root_dir, relpath)
448451

449-
if path in used or not os.path.exists(path):
452+
if path in used or not self.tree.exists(path):
450453
continue
451454

452455
actual_inode = get_inode(path)

tests/func/test_ignore.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
DvcIgnorePatterns,
1111
DvcIgnoreRepo,
1212
)
13-
from dvc.remote import LocalRemote
1413
from dvc.repo import Repo
1514
from dvc.scm.tree import WorkingTree
1615
from dvc.utils import relpath
@@ -139,8 +138,7 @@ def test_match_nested(tmp_dir, dvc):
139138
}
140139
)
141140

142-
remote = LocalRemote(dvc, {})
143-
result = {os.fspath(f) for f in remote.tree.walk_files(".")}
141+
result = {os.fspath(os.path.normpath(f)) for f in dvc.tree.walk_files(".")}
144142
assert result == {".dvcignore", "foo"}
145143

146144

@@ -149,8 +147,7 @@ def test_ignore_external(tmp_dir, scm, dvc, tmp_path_factory):
149147
ext_dir = TmpDir(os.fspath(tmp_path_factory.mktemp("external_dir")))
150148
ext_dir.gen({"y.backup": "y", "tmp": "ext tmp"})
151149

152-
remote = LocalRemote(dvc, {})
153-
result = {relpath(f, ext_dir) for f in remote.tree.walk_files(ext_dir)}
150+
result = {relpath(f, ext_dir) for f in dvc.tree.walk_files(ext_dir)}
154151
assert result == {"y.backup", "tmp"}
155152

156153

tests/func/test_state.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def test_state(tmp_dir, dvc):
1313
path_info = PathInfo(path)
1414
md5 = file_md5(path)[0]
1515

16-
state = State(dvc)
16+
state = State(dvc.cache.local)
1717

1818
with state:
1919
state.save(path_info, md5)
@@ -57,7 +57,7 @@ def get_inode_mocked(path):
5757
def test_get_state_record_for_inode(get_inode_mock, tmp_dir, dvc):
5858
tmp_dir.gen("foo", "foo content")
5959

60-
state = State(dvc)
60+
state = State(dvc.cache.local)
6161
inode = state.MAX_INT + 2
6262
assert inode != state._to_sqlite(inode)
6363

0 commit comments

Comments
 (0)