Skip to content

Commit a59f90f

Browse files
authored
Performance improvement of dvcignore (#3967)
fix#3869 1.Use big regex. * Solve windows * add rule order test * Solve ignore order * remove list comprehensions
1 parent bba5023 commit a59f90f

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

dvc/ignore.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import logging
22
import os
3+
import re
4+
from itertools import groupby
35

46
from funcy import cached_property
5-
from pathspec import PathSpec
67
from pathspec.patterns import GitWildMatchPattern
8+
from pathspec.util import normalize_file
79

810
from dvc.path_info import PathInfo
911
from dvc.scm.tree import BaseTree
12+
from dvc.system import System
1013
from dvc.utils import relpath
1114

1215
logger = logging.getLogger(__name__)
@@ -27,7 +30,16 @@ def __init__(self, ignore_file_path, tree):
2730
self.dirname = os.path.normpath(os.path.dirname(ignore_file_path))
2831

2932
with tree.open(ignore_file_path, encoding="utf-8") as fobj:
30-
self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj)
33+
path_spec_lines = fobj.readlines()
34+
regex_pattern_list = map(
35+
GitWildMatchPattern.pattern_to_regex, path_spec_lines
36+
)
37+
self.ignore_spec = [
38+
(ignore, re.compile("|".join(item[0] for item in group)))
39+
for ignore, group in groupby(
40+
regex_pattern_list, lambda x: x[1]
41+
)
42+
]
3143

3244
def __call__(self, root, dirs, files):
3345
files = [f for f in files if not self.matches(root, f)]
@@ -48,7 +60,16 @@ def matches(self, dirname, basename):
4860
else:
4961
return False
5062

51-
return self.ignore_spec.match_file(path)
63+
if not System.is_unix():
64+
path = normalize_file(path)
65+
return self.ignore(path)
66+
67+
def ignore(self, path):
68+
result = False
69+
for ignore, pattern in self.ignore_spec:
70+
if pattern.match(path):
71+
result = ignore
72+
return result
5273

5374
def __hash__(self):
5475
return hash(self.ignore_file_path)

tests/unit/test_ignore.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,18 @@ def test_should_ignore_dir(omit_dir):
9999
new_dirs, _ = ignore(root, dirs, files)
100100

101101
assert set(new_dirs) == {"dir1", "dir2"}
102+
103+
104+
def test_ignore_order():
105+
dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore")
106+
107+
patterns = ["!ac*", "a*", "!ab*"]
108+
109+
root = os.path.dirname(dvcignore_path)
110+
dirs = ["ignore_order"]
111+
files = ["ac", "ab", "aa"]
112+
113+
ignore = mock_dvcignore(dvcignore_path, patterns)
114+
_, new_files = ignore(root, dirs, files)
115+
116+
assert {"ab"} == set(new_files)

0 commit comments

Comments
 (0)