1
1
import logging
2
2
import os
3
3
import re
4
+ from collections import namedtuple
4
5
from itertools import groupby , takewhile
5
6
6
7
from pathspec .patterns import GitWildMatchPattern
7
8
from pathspec .util import normalize_file
8
9
from pygtrie import StringTrie
9
10
10
11
from dvc .path_info import PathInfo
11
- from dvc .pathspec_math import merge_patterns
12
+ from dvc .pathspec_math import PatternInfo , merge_patterns
12
13
from dvc .system import System
13
14
from dvc .utils import relpath
14
15
@@ -24,28 +25,46 @@ def __call__(self, root, dirs, files):
24
25
25
26
class DvcIgnorePatterns (DvcIgnore ):
26
27
def __init__ (self , pattern_list , dirname ):
28
+ if pattern_list :
29
+ if isinstance (pattern_list [0 ], str ):
30
+ pattern_list = [
31
+ PatternInfo (pattern , "" ) for pattern in pattern_list
32
+ ]
27
33
28
34
self .pattern_list = pattern_list
29
35
self .dirname = dirname
30
36
self .prefix = self .dirname + os .sep
31
37
32
- regex_pattern_list = map (
33
- GitWildMatchPattern .pattern_to_regex , pattern_list
34
- )
38
+ self .regex_pattern_list = [
39
+ GitWildMatchPattern .pattern_to_regex (pattern_info .patterns )
40
+ for pattern_info in pattern_list
41
+ ]
35
42
36
43
self .ignore_spec = [
37
44
(ignore , re .compile ("|" .join (item [0 ] for item in group )))
38
- for ignore , group in groupby (regex_pattern_list , lambda x : x [1 ])
45
+ for ignore , group in groupby (
46
+ self .regex_pattern_list , lambda x : x [1 ]
47
+ )
39
48
if ignore is not None
40
49
]
41
50
42
51
@classmethod
43
52
def from_files (cls , ignore_file_path , tree ):
44
53
assert os .path .isabs (ignore_file_path )
45
54
dirname = os .path .normpath (os .path .dirname (ignore_file_path ))
55
+ ignore_file_rel_path = os .path .relpath (
56
+ ignore_file_path , tree .tree_root
57
+ )
46
58
with tree .open (ignore_file_path , encoding = "utf-8" ) as fobj :
47
59
path_spec_lines = [
48
- line for line in map (str .strip , fobj .readlines ()) if line
60
+ PatternInfo (
61
+ line ,
62
+ "{}:{}:{}" .format (ignore_file_rel_path , line_no + 1 , line ),
63
+ )
64
+ for line_no , line in enumerate (
65
+ map (str .strip , fobj .readlines ())
66
+ )
67
+ if line
49
68
]
50
69
51
70
return cls (path_spec_lines , dirname )
@@ -56,7 +75,7 @@ def __call__(self, root, dirs, files):
56
75
57
76
return dirs , files
58
77
59
- def matches (self , dirname , basename , is_dir = False ):
78
+ def _get_normalize_path (self , dirname , basename ):
60
79
# NOTE: `relpath` is too slow, so we have to assume that both
61
80
# `dirname` and `self.dirname` are relative or absolute together.
62
81
if dirname == self .dirname :
@@ -70,6 +89,12 @@ def matches(self, dirname, basename, is_dir=False):
70
89
71
90
if not System .is_unix ():
72
91
path = normalize_file (path )
92
+ return path
93
+
94
+ def matches (self , dirname , basename , is_dir = False ):
95
+ path = self ._get_normalize_path (dirname , basename )
96
+ if not path :
97
+ return False
73
98
return self .ignore (path , is_dir )
74
99
75
100
def ignore (self , path , is_dir ):
@@ -85,20 +110,48 @@ def ignore(self, path, is_dir):
85
110
result = ignore
86
111
return result
87
112
113
+ def match_details (self , dirname , basename , is_dir = False ):
114
+ path = self ._get_normalize_path (dirname , basename )
115
+ if not path :
116
+ return False
117
+ return self ._ignore_details (path , is_dir )
118
+
119
+ def _ignore_details (self , path , is_dir ):
120
+ result = []
121
+ for ignore , pattern in zip (self .regex_pattern_list , self .pattern_list ):
122
+ regex = re .compile (ignore [0 ])
123
+ # skip system pattern
124
+ if not pattern .file_info :
125
+ continue
126
+ if is_dir :
127
+ path_dir = f"{ path } /"
128
+ if regex .match (path ) or regex .match (path_dir ):
129
+ result .append (pattern .file_info )
130
+ else :
131
+ if regex .match (path ):
132
+ result .append (pattern .file_info )
133
+ return result
134
+
88
135
def __hash__ (self ):
89
- return hash (self .dirname + ":" + " \n " . join (self .pattern_list ))
136
+ return hash (self .dirname + ":" + str (self .pattern_list ))
90
137
91
138
def __eq__ (self , other ):
92
139
if not isinstance (other , DvcIgnorePatterns ):
93
140
return NotImplemented
94
141
return (self .dirname == other .dirname ) & (
95
- self .pattern_list == other .pattern_list
142
+ [pattern .patterns for pattern in self .pattern_list ]
143
+ == [pattern .patterns for pattern in other .pattern_list ]
96
144
)
97
145
98
146
def __bool__ (self ):
99
147
return bool (self .pattern_list )
100
148
101
149
150
+ CheckIgnoreResult = namedtuple (
151
+ "CheckIgnoreResult" , ["file" , "match" , "patterns" ]
152
+ )
153
+
154
+
102
155
class DvcIgnoreFilterNoop :
103
156
def __init__ (self , tree , root_dir ):
104
157
pass
@@ -112,6 +165,9 @@ def is_ignored_dir(self, _):
112
165
def is_ignored_file (self , _ ):
113
166
return False
114
167
168
+ def check_ignore (self , _ ):
169
+ return []
170
+
115
171
116
172
class DvcIgnoreFilter :
117
173
@staticmethod
@@ -166,20 +222,19 @@ def _update(self, dirname):
166
222
def _update_sub_repo (self , root , dirs ):
167
223
for d in dirs :
168
224
if self ._is_dvc_repo (root , d ):
225
+ new_pattern = DvcIgnorePatterns (["/{}/" .format (d )], root )
169
226
old_pattern = self .ignores_trie_tree .longest_prefix (root ).value
170
227
if old_pattern :
171
228
self .ignores_trie_tree [root ] = DvcIgnorePatterns (
172
229
* merge_patterns (
173
230
old_pattern .pattern_list ,
174
231
old_pattern .dirname ,
175
- [ "/{}/" . format ( d )] ,
176
- root ,
232
+ new_pattern . pattern_list ,
233
+ new_pattern . dirname ,
177
234
)
178
235
)
179
236
else :
180
- self .ignores_trie_tree [root ] = DvcIgnorePatterns (
181
- ["/{}/" .format (d )], root
182
- )
237
+ self .ignores_trie_tree [root ] = new_pattern
183
238
184
239
def __call__ (self , root , dirs , files ):
185
240
ignore_pattern = self ._get_trie_pattern (root )
@@ -245,3 +300,17 @@ def _outside_repo(self, path):
245
300
):
246
301
return True
247
302
return False
303
+
304
+ def check_ignore (self , target ):
305
+ full_target = os .path .abspath (target )
306
+ if not self ._outside_repo (full_target ):
307
+ dirname , basename = os .path .split (os .path .normpath (full_target ))
308
+ pattern = self ._get_trie_pattern (dirname )
309
+ if pattern :
310
+ matches = pattern .match_details (
311
+ dirname , basename , os .path .isdir (full_target )
312
+ )
313
+
314
+ if matches :
315
+ return CheckIgnoreResult (target , True , matches )
316
+ return CheckIgnoreResult (target , False , ["::" ])
0 commit comments