Skip to content

Commit 11d906e

Browse files
karajan1001efiopjorgeorpinel
authored
dvc check-ignore command (#4282)
* Update some tests first fix #3736 * first edition * solve failure in Windows * For Windows ci * Some help ducuments issue. * Update dvc/ignore.py abspath Co-authored-by: Ruslan Kuprieiev <[email protected]> * Refactor with OutOfWorkSpaceError * Solve a bug * Update dvc/command/check_ignore.py Co-authored-by: Jorge Orpinel <[email protected]> * Update dvc/command/check_ignore.py Co-authored-by: Jorge Orpinel <[email protected]> * Update dvc/command/check_ignore.py * Revert "Refactor with OutOfWorkSpaceError" This reverts commit 27eec49. * Two change request 1. Argument `targets`'s description. 2. Error handling of `_get_normalize_path` * Update dvc/main.py Co-authored-by: Ruslan Kuprieiev <[email protected]> * `check_ignore` now only accept one path a time 1. Add a new test for the out side repo cases 2. check ignore now only check one file not file lists Co-authored-by: Ruslan Kuprieiev <[email protected]> Co-authored-by: karajan1001 <[email protected]> Co-authored-by: Jorge Orpinel <[email protected]>
1 parent b823ce4 commit 11d906e

File tree

7 files changed

+293
-24
lines changed

7 files changed

+293
-24
lines changed

dvc/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .command import (
77
add,
88
cache,
9+
check_ignore,
910
checkout,
1011
commit,
1112
completion,
@@ -79,6 +80,7 @@
7980
git_hook,
8081
plots,
8182
experiments,
83+
check_ignore,
8284
]
8385

8486

dvc/command/check_ignore.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import argparse
2+
import logging
3+
4+
from dvc.command import completion
5+
from dvc.command.base import CmdBase, append_doc_link
6+
from dvc.exceptions import DvcException
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
class CmdCheckIgnore(CmdBase):
12+
def __init__(self, args):
13+
super().__init__(args)
14+
self.ignore_filter = self.repo.tree.dvcignore
15+
16+
def _show_results(self, result):
17+
if result.match or self.args.non_matching:
18+
if self.args.details:
19+
logger.info("{}\t{}".format(result.patterns[-1], result.file))
20+
else:
21+
logger.info(result.file)
22+
23+
def run(self):
24+
if self.args.non_matching and not self.args.details:
25+
raise DvcException("--non-matching is only valid with --details")
26+
27+
if self.args.quiet and self.args.details:
28+
raise DvcException("cannot both --details and --quiet")
29+
30+
ret = 1
31+
for target in self.args.targets:
32+
result = self.ignore_filter.check_ignore(target)
33+
self._show_results(result)
34+
if result.match:
35+
ret = 0
36+
return ret
37+
38+
39+
def add_parser(subparsers, parent_parser):
40+
ADD_HELP = "Debug DVC ignore/exclude files"
41+
42+
parser = subparsers.add_parser(
43+
"check-ignore",
44+
parents=[parent_parser],
45+
description=append_doc_link(ADD_HELP, "check-ignore"),
46+
help=ADD_HELP,
47+
formatter_class=argparse.RawDescriptionHelpFormatter,
48+
)
49+
parser.add_argument(
50+
"-d",
51+
"--details",
52+
action="store_true",
53+
default=False,
54+
help="Show the exclude pattern together with each target path.",
55+
)
56+
parser.add_argument(
57+
"-n",
58+
"--non-matching",
59+
action="store_true",
60+
default=False,
61+
help="Show the target paths which don’t match any pattern. "
62+
"Only usable when `--details` is also employed",
63+
)
64+
parser.add_argument(
65+
"targets",
66+
nargs="+",
67+
help="Exact or wildcard paths of files or directories to check "
68+
"ignore patterns.",
69+
).complete = completion.FILE
70+
parser.set_defaults(func=CmdCheckIgnore)

dvc/ignore.py

Lines changed: 83 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import logging
22
import os
33
import re
4+
from collections import namedtuple
45
from itertools import groupby, takewhile
56

67
from pathspec.patterns import GitWildMatchPattern
78
from pathspec.util import normalize_file
89
from pygtrie import StringTrie
910

1011
from dvc.path_info import PathInfo
11-
from dvc.pathspec_math import merge_patterns
12+
from dvc.pathspec_math import PatternInfo, merge_patterns
1213
from dvc.system import System
1314
from dvc.utils import relpath
1415

@@ -24,28 +25,46 @@ def __call__(self, root, dirs, files):
2425

2526
class DvcIgnorePatterns(DvcIgnore):
2627
def __init__(self, pattern_list, dirname):
28+
if pattern_list:
29+
if isinstance(pattern_list[0], str):
30+
pattern_list = [
31+
PatternInfo(pattern, "") for pattern in pattern_list
32+
]
2733

2834
self.pattern_list = pattern_list
2935
self.dirname = dirname
3036
self.prefix = self.dirname + os.sep
3137

32-
regex_pattern_list = map(
33-
GitWildMatchPattern.pattern_to_regex, pattern_list
34-
)
38+
self.regex_pattern_list = [
39+
GitWildMatchPattern.pattern_to_regex(pattern_info.patterns)
40+
for pattern_info in pattern_list
41+
]
3542

3643
self.ignore_spec = [
3744
(ignore, re.compile("|".join(item[0] for item in group)))
38-
for ignore, group in groupby(regex_pattern_list, lambda x: x[1])
45+
for ignore, group in groupby(
46+
self.regex_pattern_list, lambda x: x[1]
47+
)
3948
if ignore is not None
4049
]
4150

4251
@classmethod
4352
def from_files(cls, ignore_file_path, tree):
4453
assert os.path.isabs(ignore_file_path)
4554
dirname = os.path.normpath(os.path.dirname(ignore_file_path))
55+
ignore_file_rel_path = os.path.relpath(
56+
ignore_file_path, tree.tree_root
57+
)
4658
with tree.open(ignore_file_path, encoding="utf-8") as fobj:
4759
path_spec_lines = [
48-
line for line in map(str.strip, fobj.readlines()) if line
60+
PatternInfo(
61+
line,
62+
"{}:{}:{}".format(ignore_file_rel_path, line_no + 1, line),
63+
)
64+
for line_no, line in enumerate(
65+
map(str.strip, fobj.readlines())
66+
)
67+
if line
4968
]
5069

5170
return cls(path_spec_lines, dirname)
@@ -56,7 +75,7 @@ def __call__(self, root, dirs, files):
5675

5776
return dirs, files
5877

59-
def matches(self, dirname, basename, is_dir=False):
78+
def _get_normalize_path(self, dirname, basename):
6079
# NOTE: `relpath` is too slow, so we have to assume that both
6180
# `dirname` and `self.dirname` are relative or absolute together.
6281
if dirname == self.dirname:
@@ -70,6 +89,12 @@ def matches(self, dirname, basename, is_dir=False):
7089

7190
if not System.is_unix():
7291
path = normalize_file(path)
92+
return path
93+
94+
def matches(self, dirname, basename, is_dir=False):
95+
path = self._get_normalize_path(dirname, basename)
96+
if not path:
97+
return False
7398
return self.ignore(path, is_dir)
7499

75100
def ignore(self, path, is_dir):
@@ -85,20 +110,48 @@ def ignore(self, path, is_dir):
85110
result = ignore
86111
return result
87112

113+
def match_details(self, dirname, basename, is_dir=False):
114+
path = self._get_normalize_path(dirname, basename)
115+
if not path:
116+
return False
117+
return self._ignore_details(path, is_dir)
118+
119+
def _ignore_details(self, path, is_dir):
120+
result = []
121+
for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list):
122+
regex = re.compile(ignore[0])
123+
# skip system pattern
124+
if not pattern.file_info:
125+
continue
126+
if is_dir:
127+
path_dir = f"{path}/"
128+
if regex.match(path) or regex.match(path_dir):
129+
result.append(pattern.file_info)
130+
else:
131+
if regex.match(path):
132+
result.append(pattern.file_info)
133+
return result
134+
88135
def __hash__(self):
89-
return hash(self.dirname + ":" + "\n".join(self.pattern_list))
136+
return hash(self.dirname + ":" + str(self.pattern_list))
90137

91138
def __eq__(self, other):
92139
if not isinstance(other, DvcIgnorePatterns):
93140
return NotImplemented
94141
return (self.dirname == other.dirname) & (
95-
self.pattern_list == other.pattern_list
142+
[pattern.patterns for pattern in self.pattern_list]
143+
== [pattern.patterns for pattern in other.pattern_list]
96144
)
97145

98146
def __bool__(self):
99147
return bool(self.pattern_list)
100148

101149

150+
CheckIgnoreResult = namedtuple(
151+
"CheckIgnoreResult", ["file", "match", "patterns"]
152+
)
153+
154+
102155
class DvcIgnoreFilterNoop:
103156
def __init__(self, tree, root_dir):
104157
pass
@@ -112,6 +165,9 @@ def is_ignored_dir(self, _):
112165
def is_ignored_file(self, _):
113166
return False
114167

168+
def check_ignore(self, _):
169+
return []
170+
115171

116172
class DvcIgnoreFilter:
117173
@staticmethod
@@ -166,20 +222,19 @@ def _update(self, dirname):
166222
def _update_sub_repo(self, root, dirs):
167223
for d in dirs:
168224
if self._is_dvc_repo(root, d):
225+
new_pattern = DvcIgnorePatterns(["/{}/".format(d)], root)
169226
old_pattern = self.ignores_trie_tree.longest_prefix(root).value
170227
if old_pattern:
171228
self.ignores_trie_tree[root] = DvcIgnorePatterns(
172229
*merge_patterns(
173230
old_pattern.pattern_list,
174231
old_pattern.dirname,
175-
["/{}/".format(d)],
176-
root,
232+
new_pattern.pattern_list,
233+
new_pattern.dirname,
177234
)
178235
)
179236
else:
180-
self.ignores_trie_tree[root] = DvcIgnorePatterns(
181-
["/{}/".format(d)], root
182-
)
237+
self.ignores_trie_tree[root] = new_pattern
183238

184239
def __call__(self, root, dirs, files):
185240
ignore_pattern = self._get_trie_pattern(root)
@@ -245,3 +300,17 @@ def _outside_repo(self, path):
245300
):
246301
return True
247302
return False
303+
304+
def check_ignore(self, target):
305+
full_target = os.path.abspath(target)
306+
if not self._outside_repo(full_target):
307+
dirname, basename = os.path.split(os.path.normpath(full_target))
308+
pattern = self._get_trie_pattern(dirname)
309+
if pattern:
310+
matches = pattern.match_details(
311+
dirname, basename, os.path.isdir(full_target)
312+
)
313+
314+
if matches:
315+
return CheckIgnoreResult(target, True, matches)
316+
return CheckIgnoreResult(target, False, ["::"])

dvc/pathspec_math.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33
# of two path specification patterns with different base
44
# All the operations follow the documents of `gitignore`
55
import os
6+
from collections import namedtuple
67

78
from pathspec.util import normalize_file
89

910
from dvc.utils import relpath
1011

12+
PatternInfo = namedtuple("PatternInfo", ["patterns", "file_info"])
13+
1114

1215
def _not_ignore(rule):
1316
return (True, rule[1:]) if rule.startswith("!") else (False, rule)
@@ -59,7 +62,10 @@ def _change_dirname(dirname, pattern_list, new_dirname):
5962
if rel.startswith(".."):
6063
raise ValueError("change dirname can only change to parent path")
6164

62-
return [change_rule(rule, rel) for rule in pattern_list]
65+
return [
66+
PatternInfo(change_rule(rule.patterns, rel), rule.file_info)
67+
for rule in pattern_list
68+
]
6369

6470

6571
def merge_patterns(pattern_a, prefix_a, pattern_b, prefix_b):

0 commit comments

Comments
 (0)