diff --git a/dvc/ignore.py b/dvc/ignore.py index eefa3bfef8..384ba38bd0 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -5,7 +5,9 @@ from pathspec import PathSpec from pathspec.patterns import GitWildMatchPattern +from dvc.path_info import PathInfo from dvc.scm.tree import BaseTree +from dvc.utils import relpath logger = logging.getLogger(__name__) @@ -125,19 +127,79 @@ def tree_root(self): return self.tree.tree_root def open(self, path, mode="r", encoding="utf-8"): - return self.tree.open(path, mode, encoding) + if self.isfile(path): + return self.tree.open(path, mode, encoding) + raise FileNotFoundError def exists(self, path): - return self.tree.exists(path) + if self.tree.exists(path) and self._parents_exist(path): + if self.tree.isdir(path): + return self._valid_dirname(path) + return self._valid_filename(path) + return False def isdir(self, path): - return self.tree.isdir(path) + return ( + self.tree.isdir(path) + and self._parents_exist(path) + and self._valid_dirname(path) + ) + + def _valid_dirname(self, path): + dirname, basename = os.path.split(os.path.normpath(path)) + dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], []) + if dirs: + return True + return False def isfile(self, path): - return self.tree.isfile(path) + return ( + self.tree.isfile(path) + and self._parents_exist(path) + and self._valid_filename(path) + ) + + def _valid_filename(self, path): + dirname, basename = os.path.split(os.path.normpath(path)) + _, files = self.dvcignore(os.path.abspath(dirname), [], [basename]) + if files: + return True + return False def isexec(self, path): - return self.tree.isexec(path) + return self.exists(path) and self.tree.isexec(path) + + def _parents_exist(self, path): + from dvc.repo import Repo + + path = PathInfo(path) + + # if parent is tree_root or inside a .dvc dir we can skip this check + if path.parent == self.tree_root or Repo.DVC_DIR in path.parts: + return True + + # if path is outside of tree, assume this is a local remote/local cache + # link/move operation where we do not need to filter ignores + path = relpath(path, self.tree_root) + if path.startswith("..") or ( + os.name == "nt" + and not os.path.commonprefix( + [os.path.abspath(path), self.tree_root] + ) + ): + return True + + # check if parent directories are in our ignores, starting from + # tree_root + for parent_dir in reversed(PathInfo(path).parents): + dirname, basename = os.path.split(parent_dir) + if basename == ".": + # parent_dir == tree_root + continue + dirs, _ = self.dvcignore(os.path.abspath(dirname), [basename], []) + if not dirs: + return False + return True def walk(self, top, topdown=True): for root, dirs, files in self.tree.walk(top, topdown): @@ -148,4 +210,6 @@ def walk(self, top, topdown=True): yield root, dirs, files def stat(self, path): - return self.tree.stat(path) + if self.exists(path): + return self.tree.stat(path) + raise FileNotFoundError diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 64bb733aa8..1df0000d5e 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -3,6 +3,7 @@ from dvc.ignore import CleanTree from dvc.path_info import PathInfo +from dvc.repo import Repo from dvc.repo.tree import RepoTree from dvc.scm import SCM from dvc.scm.git import GitTree @@ -220,3 +221,23 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote): cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree) for checksum in expected: assert os.path.exists(cache.checksum_to_path_info(checksum)) + + +def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch): + tmp_dir.gen({"subdir": {}}) + subrepo_dir = tmp_dir / "subdir" + with subrepo_dir.chdir(): + subrepo = Repo.init(subdir=True) + subrepo_dir.gen({"foo": "foo", "dir": {"bar": "bar"}}) + + assert isinstance(dvc.tree, CleanTree) + assert not dvc.tree.exists(subrepo_dir / "foo") + assert not dvc.tree.isfile(subrepo_dir / "foo") + assert not dvc.tree.exists(subrepo_dir / "dir") + assert not dvc.tree.isdir(subrepo_dir / "dir") + + assert isinstance(subrepo.tree, CleanTree) + assert subrepo.tree.exists(subrepo_dir / "foo") + assert subrepo.tree.isfile(subrepo_dir / "foo") + assert subrepo.tree.exists(subrepo_dir / "dir") + assert subrepo.tree.isdir(subrepo_dir / "dir")