Skip to content

Commit 394f0fe

Browse files
committed
dvc: get rid of WorkingTree
Working tree is really just a regular local tree and should be used by outputs when trying to compute a hash for themselves. We didn't use it previously because local tree was embeded into the local remote class. Related to #4050
1 parent 424ee7f commit 394f0fe

File tree

10 files changed

+73
-109
lines changed

10 files changed

+73
-109
lines changed

dvc/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ class Config(dict):
234234
def __init__(
235235
self, dvc_dir=None, validate=True, tree=None,
236236
): # pylint: disable=super-init-not-called
237-
from dvc.scm.tree import WorkingTree
237+
from dvc.tree.local import LocalRemoteTree
238238

239239
self.dvc_dir = dvc_dir
240240

@@ -248,7 +248,7 @@ def __init__(
248248
else:
249249
self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir))
250250

251-
self.wtree = WorkingTree(self.dvc_dir)
251+
self.wtree = LocalRemoteTree(None, {"url": self.dvc_dir})
252252
self.tree = tree.tree if tree else self.wtree
253253

254254
self.load(validate=validate)
@@ -326,7 +326,7 @@ def _save_config(self, level, conf_dict):
326326

327327
logger.debug(f"Writing '{filename}'.")
328328

329-
tree.makedirs(os.path.dirname(filename), exist_ok=True)
329+
tree.makedirs(os.path.dirname(filename))
330330

331331
config = configobj.ConfigObj(_pack_remotes(conf_dict))
332332
with tree.open(filename, "wb") as fobj:

dvc/repo/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,11 @@ def __init__(self, root_dir=None, scm=None, rev=None):
7777
from dvc.repo.metrics import Metrics
7878
from dvc.repo.plots import Plots
7979
from dvc.repo.params import Params
80-
from dvc.scm.tree import WorkingTree
80+
from dvc.tree.local import LocalRemoteTree
8181
from dvc.utils.fs import makedirs
8282
from dvc.stage.cache import StageCache
8383

8484
if scm:
85-
# use GitTree instead of WorkingTree as default repo tree instance
8685
tree = scm.get_tree(rev)
8786
self.root_dir = self.find_root(root_dir, tree)
8887
self.scm = scm
@@ -91,7 +90,7 @@ def __init__(self, root_dir=None, scm=None, rev=None):
9190
else:
9291
root_dir = self.find_root(root_dir)
9392
self.root_dir = os.path.abspath(os.path.realpath(root_dir))
94-
self.tree = WorkingTree(self.root_dir)
93+
self.tree = LocalRemoteTree(None, {"url": self.root_dir})
9594

9695
self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
9796
self.config = Config(self.dvc_dir, tree=self.tree)

dvc/repo/brancher.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from funcy import group_by
22

3-
from dvc.scm.tree import WorkingTree
3+
from dvc.tree.local import LocalRemoteTree
44

55

66
def brancher( # noqa: E302
@@ -29,7 +29,7 @@ def brancher( # noqa: E302
2929

3030
scm = self.scm
3131

32-
self.tree = WorkingTree(self.root_dir)
32+
self.tree = LocalRemoteTree(self, {"url": self.root_dir})
3333
yield "workspace"
3434

3535
if revs and "workspace" in revs:

dvc/scm/tree.py

Lines changed: 3 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -41,63 +41,8 @@ def makedirs(self, path, mode=0o777, exist_ok=True):
4141
raise NotImplementedError
4242

4343

44-
class WorkingTree(BaseTree):
45-
"""Proxies the repo file access methods to working tree files"""
46-
47-
def __init__(self, repo_root=None):
48-
repo_root = repo_root or os.getcwd()
49-
self.repo_root = repo_root
50-
51-
@property
52-
def tree_root(self):
53-
return self.repo_root
54-
55-
def open(self, path, mode="r", encoding="utf-8"):
56-
"""Open file and return a stream."""
57-
if "b" in mode:
58-
encoding = None
59-
return open(path, mode=mode, encoding=encoding)
60-
61-
def exists(self, path):
62-
"""Test whether a path exists."""
63-
return os.path.lexists(path)
64-
65-
def isdir(self, path):
66-
"""Return true if the pathname refers to an existing directory."""
67-
return os.path.isdir(path)
68-
69-
def isfile(self, path):
70-
"""Test whether a path is a regular file"""
71-
return os.path.isfile(path)
72-
73-
def walk(self, top, topdown=True, onerror=None):
74-
"""Directory tree generator.
75-
76-
See `os.walk` for the docs. Differences:
77-
- no support for symlinks
78-
"""
79-
for root, dirs, files in os.walk(
80-
top, topdown=topdown, onerror=onerror
81-
):
82-
yield os.path.normpath(root), dirs, files
83-
84-
def isexec(self, path):
85-
mode = os.stat(path).st_mode
86-
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
87-
88-
@staticmethod
89-
def stat(path):
90-
return os.stat(path)
91-
92-
@cached_property
93-
def hash_jobs(self):
94-
return max(1, min(4, cpu_count() // 2))
95-
96-
def makedirs(self, path, mode=0o777, exist_ok=True):
97-
os.makedirs(path, mode=mode, exist_ok=exist_ok)
98-
99-
10044
def is_working_tree(tree):
101-
return isinstance(tree, WorkingTree) or isinstance(
102-
getattr(tree, "tree", None), WorkingTree
45+
from dvc.tree.local import LocalRemoteTree
46+
return isinstance(tree, LocalRemoteTree) or isinstance(
47+
getattr(tree, "tree", None), LocalRemoteTree
10348
)

dvc/state.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from dvc.utils import current_timestamp, relpath, to_chunks
1111
from dvc.utils.fs import get_inode, get_mtime_and_size, remove
1212

13+
1314
SQLITE_MAX_VARIABLES_NUMBER = 999
1415

1516
logger = logging.getLogger(__name__)
@@ -89,10 +90,11 @@ class State: # pylint: disable=too-many-instance-attributes
8990
MAX_UINT = 2 ** 64 - 2
9091

9192
def __init__(self, local_cache):
93+
from dvc.tree.local import LocalRemoteTree
9294
repo = local_cache.repo
9395
self.repo = repo
9496
self.root_dir = repo.root_dir
95-
self.tree = local_cache.tree.work_tree
97+
self.tree = LocalRemoteTree(None, {})
9698

9799
state_config = repo.config.get("state", {})
98100
self.row_limit = state_config.get("row_limit", self.STATE_ROW_LIMIT)
@@ -394,8 +396,8 @@ def get(self, path_info):
394396
assert isinstance(path_info, str) or path_info.scheme == "local"
395397
path = os.fspath(path_info)
396398

397-
# NOTE: use os.path.exists instead of WorkingTree.exists
398-
# WorkingTree.exists uses lexists() and will return True for broken
399+
# NOTE: use os.path.exists instead of LocalRemoteTree.exists
400+
# because it uses lexists() and will return True for broken
399401
# symlinks that we cannot stat() in get_mtime_and_size
400402
if not os.path.exists(path):
401403
return None

dvc/tree/local.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from dvc.exceptions import DvcException
1010
from dvc.path_info import PathInfo
1111
from dvc.scheme import Schemes
12-
from dvc.scm.tree import WorkingTree, is_working_tree
1312
from dvc.system import System
1413
from dvc.utils import file_md5, relpath, tmp_fname
1514
from dvc.utils.fs import (
@@ -42,21 +41,16 @@ def __init__(self, repo, config):
4241
url = config.get("url")
4342
self.path_info = self.PATH_CLS(url) if url else None
4443

44+
@property
45+
def tree_root(self):
46+
return self.config.get("url")
47+
4548
@property
4649
def state(self):
4750
from dvc.state import StateNoop
4851

4952
return self.repo.state if self.repo else StateNoop()
5053

51-
@cached_property
52-
def work_tree(self):
53-
# When using repo.brancher, repo.tree may change to/from WorkingTree to
54-
# GitTree arbitarily. When repo.tree is GitTree, local cache needs to
55-
# use its own WorkingTree instance.
56-
if self.repo:
57-
return WorkingTree(self.repo.root_dir)
58-
return None
59-
6054
@staticmethod
6155
def open(path_info, mode="r", encoding=None):
6256
return open(path_info, mode=mode, encoding=encoding)
@@ -65,26 +59,39 @@ def exists(self, path_info):
6559
assert isinstance(path_info, str) or path_info.scheme == "local"
6660
if not self.repo:
6761
return os.path.exists(path_info)
68-
return self.work_tree.exists(path_info)
62+
return os.path.lexists(path_info)
6963

7064
def isfile(self, path_info):
7165
if not self.repo:
7266
return os.path.isfile(path_info)
73-
return self.work_tree.isfile(path_info)
67+
return os.path.isfile(path_info)
7468

7569
def isdir(self, path_info):
7670
if not self.repo:
7771
return os.path.isdir(path_info)
78-
return self.work_tree.isdir(path_info)
72+
return os.path.isdir(path_info)
7973

8074
def iscopy(self, path_info):
8175
return not (
8276
System.is_symlink(path_info) or System.is_hardlink(path_info)
8377
)
8478

79+
def walk(self, top, topdown=True, onerror=None):
80+
"""Directory tree generator.
81+
82+
See `os.walk` for the docs. Differences:
83+
- no support for symlinks
84+
"""
85+
for root, dirs, files in os.walk(
86+
top, topdown=topdown, onerror=onerror
87+
):
88+
yield os.path.normpath(root), dirs, files
89+
8590
def walk_files(self, path_info, **kwargs):
86-
for fname in self.work_tree.walk_files(path_info):
87-
yield PathInfo(fname)
91+
for root, _, files in self.walk(path_info):
92+
for file in files:
93+
# NOTE: os.path.join is ~5.5 times slower
94+
yield PathInfo(f"{root}{os.sep}{file}")
8895

8996
def is_empty(self, path_info):
9097
path = path_info.fspath
@@ -111,6 +118,14 @@ def remove(self, path_info):
111118
def makedirs(self, path_info):
112119
makedirs(path_info, exist_ok=True, mode=self.dir_mode)
113120

121+
def isexec(self, path):
122+
mode = os.stat(path).st_mode
123+
return mode & (stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
124+
125+
@staticmethod
126+
def stat(path):
127+
return os.stat(path)
128+
114129
def move(self, from_info, to_info, mode=None):
115130
if from_info.scheme != "local" or to_info.scheme != "local":
116131
raise NotImplementedError
@@ -215,9 +230,7 @@ def _unprotect_file(self, path):
215230
os.chmod(path, self.file_mode)
216231

217232
def _unprotect_dir(self, path):
218-
assert is_working_tree(self.repo.tree)
219-
220-
for fname in self.repo.tree.walk_files(path):
233+
for fname in self.walk_files(path):
221234
self._unprotect_file(fname)
222235

223236
def unprotect(self, path_info):

dvc/utils/fs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def get_mtime_and_size(path, tree):
4444
raise
4545
continue
4646
size += stats.st_size
47-
files_mtimes[file_path] = stats.st_mtime
47+
files_mtimes[os.fspath(file_path)] = stats.st_mtime
4848

4949
# We track file changes and moves, which cannot be detected with simply
5050
# max(mtime(f) for f in non_ignored_files)

tests/func/test_ignore.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
DvcIgnoreRepo,
1313
)
1414
from dvc.repo import Repo
15-
from dvc.scm.tree import WorkingTree
1615
from dvc.utils import relpath
1716
from dvc.utils.fs import get_mtime_and_size
17+
from dvc.path_info import PathInfo
18+
from dvc.tree.local import LocalRemoteTree
1819
from tests.dir_helpers import TmpDir
1920
from tests.utils import to_posixpath
2021

@@ -107,7 +108,7 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname):
107108
assert ignore_pattern_trie is not None
108109
assert (
109110
DvcIgnorePatterns.from_files(
110-
os.fspath(top_ignore_file), WorkingTree(dvc.root_dir)
111+
os.fspath(top_ignore_file), LocalRemoteTree(None, {"url": dvc.root_dir})
111112
)
112113
== ignore_pattern_trie[os.fspath(ignore_file)]
113114
)
@@ -165,15 +166,15 @@ def test_ignore_subrepo(tmp_dir, scm, dvc):
165166
scm.commit("init parent dvcignore")
166167

167168
subrepo_dir = tmp_dir / "subdir"
168-
assert not dvc.tree.exists(subrepo_dir / "foo")
169+
assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo"))
169170

170171
with subrepo_dir.chdir():
171172
subrepo = Repo.init(subdir=True)
172173
scm.add(str(subrepo_dir / "foo"))
173174
scm.commit("subrepo init")
174175

175176
for _ in subrepo.brancher(all_commits=True):
176-
assert subrepo.tree.exists(subrepo_dir / "foo")
177+
assert subrepo.tree.exists(PathInfo(subrepo_dir / "foo"))
177178

178179

179180
def test_ignore_blank_line(tmp_dir, dvc):

tests/func/test_tree.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
from dvc.repo.tree import RepoTree
88
from dvc.scm import SCM
99
from dvc.scm.git import GitTree
10-
from dvc.scm.tree import WorkingTree
10+
from dvc.tree.local import LocalRemoteTree
1111
from dvc.utils.fs import remove
1212
from tests.basic_env import TestDir, TestGit, TestGitSubmodule
1313

1414

15-
class TestWorkingTree(TestDir):
15+
class TestLocalRemoteTree(TestDir):
1616
def setUp(self):
1717
super().setUp()
18-
self.tree = WorkingTree()
18+
self.tree = LocalRemoteTree(None, {})
1919

2020
def test_open(self):
2121
with self.tree.open(self.FOO) as fd:
@@ -109,7 +109,7 @@ def convert_to_sets(walk_results):
109109

110110
class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir):
111111
def test(self):
112-
tree = WorkingTree(self._root_dir)
112+
tree = LocalRemoteTree(None, {"url": self._root_dir})
113113
self.assertWalkEqual(
114114
tree.walk(self._root_dir),
115115
[
@@ -128,7 +128,7 @@ def test(self):
128128
)
129129

130130
def test_subdir(self):
131-
tree = WorkingTree(self._root_dir)
131+
tree = LocalRemoteTree(None, {"url": self._root_dir})
132132
self.assertWalkEqual(
133133
tree.walk(join("data_dir", "data_sub_dir")),
134134
[(join("data_dir", "data_sub_dir"), [], ["data_sub"])],
@@ -137,7 +137,7 @@ def test_subdir(self):
137137

138138
class TestWalkInGit(AssertWalkEqualMixin, TestGit):
139139
def test_nobranch(self):
140-
tree = CleanTree(WorkingTree(self._root_dir))
140+
tree = CleanTree(LocalRemoteTree(None, {"url": self._root_dir}))
141141
self.assertWalkEqual(
142142
tree.walk("."),
143143
[
@@ -224,20 +224,24 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
224224

225225

226226
def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
227+
from dvc.path_info import PathInfo
228+
227229
tmp_dir.gen({"subdir": {}})
228230
subrepo_dir = tmp_dir / "subdir"
229231
with subrepo_dir.chdir():
230232
subrepo = Repo.init(subdir=True)
231233
subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}})
232234

235+
path = PathInfo(subrepo_dir)
236+
233237
assert isinstance(dvc.tree, CleanTree)
234-
assert not dvc.tree.exists(subrepo_dir / "foo")
235-
assert not dvc.tree.isfile(subrepo_dir / "foo")
236-
assert not dvc.tree.exists(subrepo_dir / "dir")
237-
assert not dvc.tree.isdir(subrepo_dir / "dir")
238+
assert not dvc.tree.exists(path / "foo")
239+
assert not dvc.tree.isfile(path / "foo")
240+
assert not dvc.tree.exists(path / "dir")
241+
assert not dvc.tree.isdir(path / "dir")
238242

239243
assert isinstance(subrepo.tree, CleanTree)
240-
assert subrepo.tree.exists(subrepo_dir / "foo")
241-
assert subrepo.tree.isfile(subrepo_dir / "foo")
242-
assert subrepo.tree.exists(subrepo_dir / "dir")
243-
assert subrepo.tree.isdir(subrepo_dir / "dir")
244+
assert subrepo.tree.exists(path / "foo")
245+
assert subrepo.tree.isfile(path / "foo")
246+
assert subrepo.tree.exists(path / "dir")
247+
assert subrepo.tree.isdir(path / "dir")

0 commit comments

Comments
 (0)