Skip to content

Commit 60d83a7

Browse files
dvcignore: Fix incorrect ignored output computation (#4986)
* dvcignore: Fix incorrect ignored output computation * dvcignore: Fixed unignored patterns Unignored patterns were shown in the dvc status output and caused Error. Also made a bit of a refactoring in `matches` and `ignore`. * dvcignore: Fix functional tests Not sure if I follow, but tests assume that unignored files should trigger `check-ignore`. What was the intention? * dvcignore: Fix stage.wdir being MagicMock * Use `is_ignored` instead of `check_ignore` 1. use is_ignored to judge DVC ignore status. 2. use check_ignore to show ignore patterns. * Solve tests fails * Add a new tests * Return to the old check_ignore logic. * Pass the pylint * Simplify ignore tests Co-authored-by: karajan1001 <[email protected]>
1 parent 689ae22 commit 60d83a7

File tree

5 files changed

+66
-35
lines changed

5 files changed

+66
-35
lines changed

dvc/ignore.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -91,45 +91,50 @@ def _get_normalize_path(self, dirname, basename):
9191
path = normalize_file(path)
9292
return path
9393

94-
def matches(self, dirname, basename, is_dir=False):
94+
def matches(self, dirname, basename, is_dir=False, details: bool = False):
9595
path = self._get_normalize_path(dirname, basename)
9696
if not path:
9797
return False
98+
99+
if details:
100+
return self._ignore_details(path, is_dir)
98101
return self.ignore(path, is_dir)
99102

100103
def ignore(self, path, is_dir):
104+
def matches(pattern, path, is_dir) -> bool:
105+
matches_ = bool(pattern.match(path))
106+
107+
if is_dir:
108+
matches_ |= bool(pattern.match(f"{path}/"))
109+
110+
return matches_
111+
101112
result = False
102-
if is_dir:
103-
path_dir = f"{path}/"
104-
for ignore, pattern in self.ignore_spec:
105-
if pattern.match(path) or pattern.match(path_dir):
106-
result = ignore
107-
else:
108-
for ignore, pattern in self.ignore_spec:
109-
if pattern.match(path):
110-
result = ignore
111-
return result
112113

113-
def match_details(self, dirname, basename, is_dir=False):
114-
path = self._get_normalize_path(dirname, basename)
115-
if not path:
116-
return False
117-
return self._ignore_details(path, is_dir)
114+
for ignore, pattern in self.ignore_spec[::-1]:
115+
if matches(pattern, path, is_dir):
116+
result = ignore
117+
break
118+
return result
118119

119-
def _ignore_details(self, path, is_dir):
120+
def _ignore_details(self, path, is_dir: bool):
120121
result = []
121-
for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list):
122-
regex = re.compile(ignore[0])
122+
for (regex, _), pattern_info in list(
123+
zip(self.regex_pattern_list, self.pattern_list)
124+
):
123125
# skip system pattern
124-
if not pattern.file_info:
126+
if not pattern_info.file_info:
125127
continue
128+
129+
regex = re.compile(regex)
130+
131+
matches = bool(regex.match(path))
126132
if is_dir:
127-
path_dir = f"{path}/"
128-
if regex.match(path) or regex.match(path_dir):
129-
result.append(pattern.file_info)
130-
else:
131-
if regex.match(path):
132-
result.append(pattern.file_info)
133+
matches |= bool(regex.match(f"{path}/"))
134+
135+
if matches:
136+
result.append(pattern_info.file_info)
137+
133138
return result
134139

135140
def __hash__(self):
@@ -323,13 +328,15 @@ def _outside_repo(self, path):
323328
return False
324329

325330
def check_ignore(self, target):
331+
# NOTE: can only be used in `dvc check-ignore`, see
332+
# https://github.com/iterative/dvc/issues/5046
326333
full_target = os.path.abspath(target)
327334
if not self._outside_repo(full_target):
328335
dirname, basename = os.path.split(os.path.normpath(full_target))
329336
pattern = self._get_trie_pattern(dirname)
330337
if pattern:
331-
matches = pattern.match_details(
332-
dirname, basename, os.path.isdir(full_target)
338+
matches = pattern.matches(
339+
dirname, basename, os.path.isdir(full_target), True,
333340
)
334341

335342
if matches:
@@ -339,7 +346,11 @@ def check_ignore(self, target):
339346
def is_ignored(self, path):
340347
# NOTE: can't use self.check_ignore(path).match for now, see
341348
# https://github.com/iterative/dvc/issues/4555
342-
return self.is_ignored_dir(path) or self.is_ignored_file(path)
349+
if os.path.isfile(path):
350+
return self.is_ignored_file(path)
351+
if os.path.isdir(path):
352+
return self.is_ignored_dir(path)
353+
return self.is_ignored_file(path) or self.is_ignored_dir(path)
343354

344355

345356
def init(path):

dvc/output/base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,9 @@ def _validate_output_path(cls, path, stage=None):
559559
raise cls.IsStageFileError(path)
560560

561561
if stage:
562-
check = stage.repo.tree.dvcignore.check_ignore(path)
563-
if check.match:
562+
abs_path = os.path.join(stage.wdir, path)
563+
if stage.repo.tree.dvcignore.is_ignored(abs_path):
564+
check = stage.repo.tree.dvcignore.check_ignore(abs_path)
564565
raise cls.IsIgnoredError(check)
565566

566567
def _check_can_merge(self, out):

tests/dir_helpers.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,14 @@ def run_copy(tmp_dir, dvc):
332332
)
333333

334334
def run_copy(src, dst, **run_kwargs):
335+
wdir = pathlib.Path(run_kwargs.get("wdir", "."))
336+
wdir = pathlib.Path("../" * len(wdir.parts))
337+
script_path = wdir / "copy.py"
338+
335339
return dvc.run(
336-
cmd=f"python copy.py {src} {dst}",
340+
cmd=f"python {script_path} {src} {dst}",
337341
outs=[dst],
338-
deps=[src, "copy.py"],
342+
deps=[src, f"{script_path}"],
339343
**run_kwargs,
340344
)
341345

tests/func/test_ignore.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import shutil
3+
from pathlib import Path
34

45
import pytest
56

@@ -404,7 +405,17 @@ def test_ignore_in_added_dir(tmp_dir, dvc):
404405

405406

406407
def test_ignored_output(tmp_dir, scm, dvc, run_copy):
407-
tmp_dir.gen({".dvcignore": "*.log", "foo": "foo content"})
408+
tmp_dir.gen({".dvcignore": "*.log\n!foo.log", "foo": "foo content"})
408409

409410
with pytest.raises(OutputIsIgnoredError):
410-
run_copy("foo", "foo.log", name="copy")
411+
run_copy("foo", "abc.log", name="copy")
412+
413+
run_copy("foo", "foo.log", name="copy")
414+
415+
416+
def test_ignored_output_nested(tmp_dir, scm, dvc, run_copy):
417+
tmp_dir.gen({".dvcignore": "/*.log", "copy": {"foo": "foo content"}})
418+
419+
run_copy("foo", "foo.log", name="copy", wdir="copy")
420+
421+
assert Path("copy/foo.log").exists()

tests/unit/output/test_output.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def test_get_used_cache(exists, expected_message, mocker, caplog):
7373
stage = mocker.MagicMock()
7474
mocker.patch.object(stage, "__str__", return_value="stage: 'stage.dvc'")
7575
mocker.patch.object(stage, "addressing", "stage.dvc")
76+
mocker.patch.object(stage, "wdir", ".")
77+
mocker.patch.object(
78+
stage.repo.tree.dvcignore, "is_ignored", return_value=False,
79+
)
7680
mocker.patch.object(
7781
stage.repo.tree.dvcignore,
7882
"check_ignore",

0 commit comments

Comments
 (0)