Skip to content

Commit 33abc40

Browse files
authored
dvc: get rid of WorkingTree (#4216)
Working tree is really just a regular local tree and should be used by outputs when trying to compute a hash for themselves. We didn't use it previously because local tree was embeded into the local remote class. Related to #4050
1 parent 424ee7f commit 33abc40

File tree

10 files changed

+74
-115
lines changed

10 files changed

+74
-115
lines changed

dvc/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ class Config(dict):
234234
def __init__(
235235
self, dvc_dir=None, validate=True, tree=None,
236236
): # pylint: disable=super-init-not-called
237-
from dvc.scm.tree import WorkingTree
237+
from dvc.tree.local import LocalRemoteTree
238238

239239
self.dvc_dir = dvc_dir
240240

@@ -248,7 +248,7 @@ def __init__(
248248
else:
249249
self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir))
250250

251-
self.wtree = WorkingTree(self.dvc_dir)
251+
self.wtree = LocalRemoteTree(None, {"url": self.dvc_dir})
252252
self.tree = tree.tree if tree else self.wtree
253253

254254
self.load(validate=validate)
@@ -326,7 +326,7 @@ def _save_config(self, level, conf_dict):
326326

327327
logger.debug(f"Writing '{filename}'.")
328328

329-
tree.makedirs(os.path.dirname(filename), exist_ok=True)
329+
tree.makedirs(os.path.dirname(filename))
330330

331331
config = configobj.ConfigObj(_pack_remotes(conf_dict))
332332
with tree.open(filename, "wb") as fobj:

dvc/repo/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,11 @@ def __init__(self, root_dir=None, scm=None, rev=None):
7777
from dvc.repo.metrics import Metrics
7878
from dvc.repo.plots import Plots
7979
from dvc.repo.params import Params
80-
from dvc.scm.tree import WorkingTree
80+
from dvc.tree.local import LocalRemoteTree
8181
from dvc.utils.fs import makedirs
8282
from dvc.stage.cache import StageCache
8383

8484
if scm:
85-
# use GitTree instead of WorkingTree as default repo tree instance
8685
tree = scm.get_tree(rev)
8786
self.root_dir = self.find_root(root_dir, tree)
8887
self.scm = scm
@@ -91,7 +90,7 @@ def __init__(self, root_dir=None, scm=None, rev=None):
9190
else:
9291
root_dir = self.find_root(root_dir)
9392
self.root_dir = os.path.abspath(os.path.realpath(root_dir))
94-
self.tree = WorkingTree(self.root_dir)
93+
self.tree = LocalRemoteTree(None, {"url": self.root_dir})
9594

9695
self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
9796
self.config = Config(self.dvc_dir, tree=self.tree)

dvc/repo/brancher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from funcy import group_by
22

3-
from dvc.scm.tree import WorkingTree
3+
from dvc.tree.local import LocalRemoteTree
44

55

66
def brancher( # noqa: E302
@@ -29,7 +29,7 @@ def brancher( # noqa: E302
2929

3030
scm = self.scm
3131

32-
self.tree = WorkingTree(self.root_dir)
32+
self.tree = LocalRemoteTree(self, {"url": self.root_dir})
3333
yield "workspace"
3434

3535
if revs and "workspace" in revs:

dvc/scm/tree.py

Lines changed: 4 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
import os
2-
import stat
3-
from multiprocessing import cpu_count
4-
5-
from funcy import cached_property
62

73

84
class BaseTree:
@@ -41,63 +37,9 @@ def makedirs(self, path, mode=0o777, exist_ok=True):
4137
raise NotImplementedError
4238

4339

44-
class WorkingTree(BaseTree):
45-
"""Proxies the repo file access methods to working tree files"""
46-
47-
def __init__(self, repo_root=None):
48-
repo_root = repo_root or os.getcwd()
49-
self.repo_root = repo_root
50-
51-
@property
52-
def tree_root(self):
53-
return self.repo_root
54-
55-
def open(self, path, mode="r", encoding="utf-8"):
56-
"""Open file and return a stream."""
57-
if "b" in mode:
58-
encoding = None
59-
return open(path, mode=mode, encoding=encoding)
60-
61-
def exists(self, path):
62-
"""Test whether a path exists."""
63-
return os.path.lexists(path)
64-
65-
def isdir(self, path):
66-
"""Return true if the pathname refers to an existing directory."""
67-
return os.path.isdir(path)
68-
69-
def isfile(self, path):
70-
"""Test whether a path is a regular file"""
71-
return os.path.isfile(path)
72-
73-
def walk(self, top, topdown=True, onerror=None):
74-
"""Directory tree generator.
75-
76-
See `os.walk` for the docs. Differences:
77-
- no support for symlinks
78-
"""
79-
for root, dirs, files in os.walk(
80-
top, topdown=topdown, onerror=onerror
81-
):
82-
yield os.path.normpath(root), dirs, files
83-
84-
def isexec(self, path):
85-
mode = os.stat(path).st_mode
86-
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
87-
88-
@staticmethod
89-
def stat(path):
90-
return os.stat(path)
91-
92-
@cached_property
93-
def hash_jobs(self):
94-
return max(1, min(4, cpu_count() // 2))
95-
96-
def makedirs(self, path, mode=0o777, exist_ok=True):
97-
os.makedirs(path, mode=mode, exist_ok=exist_ok)
98-
99-
10040
def is_working_tree(tree):
101-
return isinstance(tree, WorkingTree) or isinstance(
102-
getattr(tree, "tree", None), WorkingTree
41+
from dvc.tree.local import LocalRemoteTree
42+
43+
return isinstance(tree, LocalRemoteTree) or isinstance(
44+
getattr(tree, "tree", None), LocalRemoteTree
10345
)

dvc/state.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,12 @@ class State: # pylint: disable=too-many-instance-attributes
8989
MAX_UINT = 2 ** 64 - 2
9090

9191
def __init__(self, local_cache):
92+
from dvc.tree.local import LocalRemoteTree
93+
9294
repo = local_cache.repo
9395
self.repo = repo
9496
self.root_dir = repo.root_dir
95-
self.tree = local_cache.tree.work_tree
97+
self.tree = LocalRemoteTree(None, {})
9698

9799
state_config = repo.config.get("state", {})
98100
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
@@ -394,8 +396,8 @@ def get(self, path_info):
394396
assert isinstance(path_info, str) or path_info.scheme == "local"
395397
path = os.fspath(path_info)
396398

397-
# NOTE: use os.path.exists instead of WorkingTree.exists
398-
# WorkingTree.exists uses lexists() and will return True for broken
399+
# NOTE: use os.path.exists instead of LocalRemoteTree.exists
400+
# because it uses lexists() and will return True for broken
399401
# symlinks that we cannot stat() in get_mtime_and_size
400402
if not os.path.exists(path):
401403
return None

dvc/tree/local.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33
import os
44
import stat
55

6-
from funcy import cached_property
76
from shortuuid import uuid
87

98
from dvc.exceptions import DvcException
109
from dvc.path_info import PathInfo
1110
from dvc.scheme import Schemes
12-
from dvc.scm.tree import WorkingTree, is_working_tree
1311
from dvc.system import System
1412
from dvc.utils import file_md5, relpath, tmp_fname
1513
from dvc.utils.fs import (
@@ -42,21 +40,16 @@ def __init__(self, repo, config):
4240
url = config.get("url")
4341
self.path_info = self.PATH_CLS(url) if url else None
4442

43+
@property
44+
def tree_root(self):
45+
return self.config.get("url")
46+
4547
@property
4648
def state(self):
4749
from dvc.state import StateNoop
4850

4951
return self.repo.state if self.repo else StateNoop()
5052

51-
@cached_property
52-
def work_tree(self):
53-
# When using repo.brancher, repo.tree may change to/from WorkingTree to
54-
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
55-
# use its own WorkingTree instance.
56-
if self.repo:
57-
return WorkingTree(self.repo.root_dir)
58-
return None
59-
6053
@staticmethod
6154
def open(path_info, mode="r", encoding=None):
6255
return open(path_info, mode=mode, encoding=encoding)
@@ -65,26 +58,39 @@ def exists(self, path_info):
6558
assert isinstance(path_info, str) or path_info.scheme == "local"
6659
if not self.repo:
6760
return os.path.exists(path_info)
68-
return self.work_tree.exists(path_info)
61+
return os.path.lexists(path_info)
6962

7063
def isfile(self, path_info):
7164
if not self.repo:
7265
return os.path.isfile(path_info)
73-
return self.work_tree.isfile(path_info)
66+
return os.path.isfile(path_info)
7467

7568
def isdir(self, path_info):
7669
if not self.repo:
7770
return os.path.isdir(path_info)
78-
return self.work_tree.isdir(path_info)
71+
return os.path.isdir(path_info)
7972

8073
def iscopy(self, path_info):
8174
return not (
8275
System.is_symlink(path_info) or System.is_hardlink(path_info)
8376
)
8477

78+
def walk(self, top, topdown=True, onerror=None):
79+
"""Directory tree generator.
80+
81+
See `os.walk` for the docs. Differences:
82+
- no support for symlinks
83+
"""
84+
for root, dirs, files in os.walk(
85+
top, topdown=topdown, onerror=onerror
86+
):
87+
yield os.path.normpath(root), dirs, files
88+
8589
def walk_files(self, path_info, **kwargs):
86-
for fname in self.work_tree.walk_files(path_info):
87-
yield PathInfo(fname)
90+
for root, _, files in self.walk(path_info):
91+
for file in files:
92+
# NOTE: os.path.join is ~5.5 times slower
93+
yield PathInfo(f"{root}{os.sep}{file}")
8894

8995
def is_empty(self, path_info):
9096
path = path_info.fspath
@@ -111,6 +117,14 @@ def remove(self, path_info):
111117
def makedirs(self, path_info):
112118
makedirs(path_info, exist_ok=True, mode=self.dir_mode)
113119

120+
def isexec(self, path):
121+
mode = os.stat(path).st_mode
122+
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
123+
124+
@staticmethod
125+
def stat(path):
126+
return os.stat(path)
127+
114128
def move(self, from_info, to_info, mode=None):
115129
if from_info.scheme != "local" or to_info.scheme != "local":
116130
raise NotImplementedError
@@ -215,9 +229,7 @@ def _unprotect_file(self, path):
215229
os.chmod(path, self.file_mode)
216230

217231
def _unprotect_dir(self, path):
218-
assert is_working_tree(self.repo.tree)
219-
220-
for fname in self.repo.tree.walk_files(path):
232+
for fname in self.walk_files(path):
221233
self._unprotect_file(fname)
222234

223235
def unprotect(self, path_info):

dvc/utils/fs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_mtime_and_size(path, tree):
4444
raise
4545
continue
4646
size += stats.st_size
47-
files_mtimes[file_path] = stats.st_mtime
47+
files_mtimes[os.fspath(file_path)] = stats.st_mtime
4848

4949
# We track file changes and moves, which cannot be detected with simply
5050
# max(mtime(f) for f in non_ignored_files)

tests/func/test_ignore.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
DvcIgnorePatternsTrie,
1212
DvcIgnoreRepo,
1313
)
14+
from dvc.path_info import PathInfo
1415
from dvc.repo import Repo
15-
from dvc.scm.tree import WorkingTree
16+
from dvc.tree.local import LocalRemoteTree
1617
from dvc.utils import relpath
1718
from dvc.utils.fs import get_mtime_and_size
1819
from tests.dir_helpers import TmpDir
@@ -107,7 +108,8 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
107108
assert ignore_pattern_trie is not None
108109
assert (
109110
DvcIgnorePatterns.from_files(
110-
os.fspath(top_ignore_file), WorkingTree(dvc.root_dir)
111+
os.fspath(top_ignore_file),
112+
LocalRemoteTree(None, {"url": dvc.root_dir}),
111113
)
112114
== ignore_pattern_trie[os.fspath(ignore_file)]
113115
)
@@ -165,15 +167,15 @@ def test_ignore_subrepo(tmp_dir, scm, dvc):
165167
scm.commit("init parent dvcignore")
166168

167169
subrepo_dir = tmp_dir / "subdir"
168-
assert not dvc.tree.exists(subrepo_dir / "foo")
170+
assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo"))
169171

170172
with subrepo_dir.chdir():
171173
subrepo = Repo.init(subdir=True)
172174
scm.add(str(subrepo_dir / "foo"))
173175
scm.commit("subrepo init")
174176

175177
for _ in subrepo.brancher(all_commits=True):
176-
assert subrepo.tree.exists(subrepo_dir / "foo")
178+
assert subrepo.tree.exists(PathInfo(subrepo_dir / "foo"))
177179

178180

179181
def test_ignore_blank_line(tmp_dir, dvc):

0 commit comments

Comments
 (0)