Skip to content

Commit 10f2bb9

Browse files
authored
ignore: CleanTree should always check ignores, not just for walk() (#3876)
* tests: test that CleanTree works outside of walk() * ignore: CleanTree should always check ignores, not just for walk() * CleanTree: handle broken symlinks * handle case for linking from shared local cache dirs * Fix windows relpath/different drive issue
1 parent 2fecc66 commit 10f2bb9

File tree

2 files changed

+91
-6
lines changed

2 files changed

+91
-6
lines changed

dvc/ignore.py

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from pathspec import PathSpec
66
from pathspec.patterns import GitWildMatchPattern
77

8+
from dvc.path_info import PathInfo
89
from dvc.scm.tree import BaseTree
10+
from dvc.utils import relpath
911

1012
logger = logging.getLogger(__name__)
1113

@@ -125,19 +127,79 @@ def tree_root(self):
125127
return self.tree.tree_root
126128

127129
def open(self, path, mode="r", encoding="utf-8"):
128-
return self.tree.open(path, mode, encoding)
130+
if self.isfile(path):
131+
return self.tree.open(path, mode, encoding)
132+
raise FileNotFoundError
129133

130134
def exists(self, path):
131-
return self.tree.exists(path)
135+
if self.tree.exists(path) and self._parents_exist(path):
136+
if self.tree.isdir(path):
137+
return self._valid_dirname(path)
138+
return self._valid_filename(path)
139+
return False
132140

133141
def isdir(self, path):
134-
return self.tree.isdir(path)
142+
return (
143+
self.tree.isdir(path)
144+
and self._parents_exist(path)
145+
and self._valid_dirname(path)
146+
)
147+
148+
def _valid_dirname(self, path):
149+
dirname, basename = os.path.split(os.path.normpath(path))
150+
dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], [])
151+
if dirs:
152+
return True
153+
return False
135154

136155
def isfile(self, path):
137-
return self.tree.isfile(path)
156+
return (
157+
self.tree.isfile(path)
158+
and self._parents_exist(path)
159+
and self._valid_filename(path)
160+
)
161+
162+
def _valid_filename(self, path):
163+
dirname, basename = os.path.split(os.path.normpath(path))
164+
_, files = self.dvcignore(os.path.abspath(dirname), [], [basename])
165+
if files:
166+
return True
167+
return False
138168

139169
def isexec(self, path):
140-
return self.tree.isexec(path)
170+
return self.exists(path) and self.tree.isexec(path)
171+
172+
def _parents_exist(self, path):
173+
from dvc.repo import Repo
174+
175+
path = PathInfo(path)
176+
177+
# if parent is tree_root or inside a .dvc dir we can skip this check
178+
if path.parent == self.tree_root or Repo.DVC_DIR in path.parts:
179+
return True
180+
181+
# if path is outside of tree, assume this is a local remote/local cache
182+
# link/move operation where we do not need to filter ignores
183+
path = relpath(path, self.tree_root)
184+
if path.startswith("..") or (
185+
os.name == "nt"
186+
and not os.path.commonprefix(
187+
[os.path.abspath(path), self.tree_root]
188+
)
189+
):
190+
return True
191+
192+
# check if parent directories are in our ignores, starting from
193+
# tree_root
194+
for parent_dir in reversed(PathInfo(path).parents):
195+
dirname, basename = os.path.split(parent_dir)
196+
if basename == ".":
197+
# parent_dir == tree_root
198+
continue
199+
dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], [])
200+
if not dirs:
201+
return False
202+
return True
141203

142204
def walk(self, top, topdown=True):
143205
for root, dirs, files in self.tree.walk(top, topdown):
@@ -148,4 +210,6 @@ def walk(self, top, topdown=True):
148210
yield root, dirs, files
149211

150212
def stat(self, path):
151-
return self.tree.stat(path)
213+
if self.exists(path):
214+
return self.tree.stat(path)
215+
raise FileNotFoundError

tests/func/test_tree.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from dvc.ignore import CleanTree
55
from dvc.path_info import PathInfo
6+
from dvc.repo import Repo
67
from dvc.repo.tree import RepoTree
78
from dvc.scm import SCM
89
from dvc.scm.git import GitTree
@@ -220,3 +221,23 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote):
220221
cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree)
221222
for checksum in expected:
222223
assert os.path.exists(cache.checksum_to_path_info(checksum))
224+
225+
226+
def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
227+
tmp_dir.gen({"subdir": {}})
228+
subrepo_dir = tmp_dir / "subdir"
229+
with subrepo_dir.chdir():
230+
subrepo = Repo.init(subdir=True)
231+
subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}})
232+
233+
assert isinstance(dvc.tree, CleanTree)
234+
assert not dvc.tree.exists(subrepo_dir / "foo")
235+
assert not dvc.tree.isfile(subrepo_dir / "foo")
236+
assert not dvc.tree.exists(subrepo_dir / "dir")
237+
assert not dvc.tree.isdir(subrepo_dir / "dir")
238+
239+
assert isinstance(subrepo.tree, CleanTree)
240+
assert subrepo.tree.exists(subrepo_dir / "foo")
241+
assert subrepo.tree.isfile(subrepo_dir / "foo")
242+
assert subrepo.tree.exists(subrepo_dir / "dir")
243+
assert subrepo.tree.isdir(subrepo_dir / "dir")

0 commit comments

Comments
 (0)