Skip to content

Commit 253a0ea

Browse files
authored
Add more tests according to gitignore (#4166)
* Add more tests according to .gitignore * Test update * Update test * Solve question of path * update one test * Update 2 tests * Add one tests * comment those failed tests caused by other package * parametrize some tests Co-authored-by: karajan1001 <[email protected]>
1 parent 8964967 commit 253a0ea

File tree

3 files changed

+195
-48
lines changed

3 files changed

+195
-48
lines changed

dvc/ignore.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ def __init__(self, ignore_file_path, tree):
4444

4545
def __call__(self, root, dirs, files):
4646
files = [f for f in files if not self.matches(root, f)]
47-
dirs = [d for d in dirs if not self.matches(root, d)]
47+
dirs = [d for d in dirs if not self.matches(root, d, True)]
4848

4949
return dirs, files
5050

51-
def matches(self, dirname, basename):
51+
def matches(self, dirname, basename, is_dir=False):
5252
# NOTE: `relpath` is too slow, so we have to assume that both
5353
# `dirname` and `self.dirname` are relative or absolute together.
5454
prefix = self.dirname + os.sep
@@ -63,13 +63,19 @@ def matches(self, dirname, basename):
6363

6464
if not System.is_unix():
6565
path = normalize_file(path)
66-
return self.ignore(path)
66+
return self.ignore(path, is_dir)
6767

68-
def ignore(self, path):
68+
def ignore(self, path, is_dir):
6969
result = False
70-
for ignore, pattern in self.ignore_spec:
71-
if pattern.match(path):
72-
result = ignore
70+
if is_dir:
71+
path_dir = f"{path}/"
72+
for ignore, pattern in self.ignore_spec:
73+
if pattern.match(path) or pattern.match(path_dir):
74+
result = ignore
75+
else:
76+
for ignore, pattern in self.ignore_spec:
77+
if pattern.match(path):
78+
result = ignore
7379
return result
7480

7581
def __hash__(self):

tests/func/test_ignore.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,66 @@ def test_ignore_blank_line(tmp_dir, dvc):
173173
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored")
174174

175175
assert _files_set("dir", dvc.tree) == {"dir/other"}
176+
177+
178+
# It is not possible to re-include a file if a parent directory of
179+
# that file is excluded.
180+
# Git doesn’t list excluded directories for performance reasons,
181+
# so any patterns on contained files have no effect,
182+
# no matter where they are defined.
183+
@pytest.mark.parametrize(
184+
"data_struct, pattern_list, result_set",
185+
[
186+
(
187+
{"dir": {"subdir": {"not_ignore": "121"}}},
188+
["subdir/*", "!not_ignore"],
189+
{"dir/subdir/not_ignore"},
190+
),
191+
(
192+
{"dir": {"subdir": {"should_ignore": "121"}}},
193+
["subdir", "!should_ignore"],
194+
set(),
195+
),
196+
(
197+
{"dir": {"subdir": {"should_ignore": "121"}}},
198+
["subdir/", "!should_ignore"],
199+
set(),
200+
),
201+
],
202+
)
203+
def test_ignore_file_in_parent_path(
204+
tmp_dir, dvc, data_struct, pattern_list, result_set
205+
):
206+
tmp_dir.gen(data_struct)
207+
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list))
208+
assert _files_set("dir", dvc.tree) == result_set
209+
210+
211+
# If there is a separator at the end of the pattern then the pattern
212+
# will only match directories,
213+
# otherwise the pattern can match both files and directories.
214+
# For example, a pattern doc/frotz/ matches doc/frotz directory,
215+
# but not a/doc/frotz directory;
216+
def test_ignore_sub_directory(tmp_dir, dvc):
217+
tmp_dir.gen(
218+
{
219+
"dir": {
220+
"doc": {"fortz": {"b": "b"}},
221+
"a": {"doc": {"fortz": {"a": "a"}}},
222+
}
223+
}
224+
)
225+
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}})
226+
assert _files_set("dir", dvc.tree) == {
227+
"dir/a/doc/fortz/a",
228+
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
229+
}
230+
231+
232+
# however frotz/ matches frotz and a/frotz that is a directory
233+
def test_ignore_directory(tmp_dir, dvc):
234+
tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}})
235+
tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}})
236+
assert _files_set("dir", dvc.tree) == {
237+
"dir/{}".format(DvcIgnore.DVCIGNORE_FILE),
238+
}

tests/unit/test_ignore.py

Lines changed: 119 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,51 @@ def mock_dvcignore(dvcignore_path, patterns):
1414
return ignore_patterns
1515

1616

17-
def test_ignore_from_file_should_filter_dirs_and_files():
18-
dvcignore_path = os.path.join(
19-
os.path.sep, "full", "path", "to", "ignore", "file", ".dvcignore"
20-
)
21-
22-
patterns = ["dir_to_ignore", "file_to_ignore"]
23-
24-
root = os.path.dirname(dvcignore_path)
25-
dirs = ["dir1", "dir2", "dir_to_ignore"]
26-
files = ["file1", "file2", "file_to_ignore"]
27-
28-
ignore = mock_dvcignore(dvcignore_path, patterns)
29-
new_dirs, new_files = ignore(root, dirs, files)
30-
31-
assert {"dir1", "dir2"} == set(new_dirs)
32-
assert {"file1", "file2"} == set(new_files)
33-
34-
3517
@pytest.mark.parametrize(
3618
"file_to_ignore_relpath, patterns, expected_match",
3719
[
20+
# all rules from https://git-scm.com/docs/gitignore
3821
("to_ignore", ["to_ignore"], True),
22+
("dont_ignore.txt", ["dont_ignore"], False),
23+
# A blank line matches no files, so it can serve as a separator for
24+
# readability.
25+
("to_ignore", ["", "to_ignore"], True),
26+
# A line starting with # serves as a comment.
27+
# Put a backslash ("\") in front of the first hash for patterns
28+
# that begin with a hash.
29+
("#to_ignore", ["\\#to_ignore"], True),
30+
("#to_ignore", ["#to_ignore"], False),
31+
# Trailing spaces are ignored unless they are quoted with
32+
# backslash ("\").
33+
(" to_ignore", [" to_ignore"], False),
34+
(" to_ignore", ["\\ to_ignore"], True),
35+
# An optional prefix "!" which negates the pattern; any matching file
36+
# excluded by a previous pattern will become included again.
3937
("to_ignore.txt", ["to_ignore*"], True),
40-
(
41-
os.path.join("rel", "p", "p2", "to_ignore"),
42-
["rel/**/to_ignore"],
43-
True,
44-
),
38+
("to_ignore.txt", ["to_ignore*", "!to_ignore.txt"], False),
39+
("to_ignore.txt", ["!to_ignore.txt", "to_ignore*"], True),
40+
# It is not possible to re-include a file if a parent directory of
41+
# that file is excluded.
42+
# Git doesn’t list excluded directories for performance reasons,
43+
# so any patterns on contained files have no effect,
44+
# no matter where they are defined.
45+
# see (`tests/func/test_ignore.py::test_ignore_parent_path`)
46+
# Put a backslash ("\") in front of the first "!"
47+
# for patterns that begin with a literal "!",
48+
# for example, "\!important!.txt".
49+
("!to_ignore.txt", ["\\!to_ignore.txt"], True),
50+
# The slash / is used as the directory separator.
51+
# Separators may occur at the beginning, middle or end of the
52+
# .gitignore search pattern.
53+
# If there is a separator at the beginning or middle (or both)
54+
# of the pattern, then the pattern is relative to the directory
55+
# level of the particular .gitignore file itself.
56+
# Otherwise the pattern may also match at any level below
57+
# the .gitignore level.
58+
("file", ["/file"], True),
59+
(os.path.join("data", "file"), ["/file"], False),
60+
(os.path.join("data", "file"), ["data/file"], True),
61+
(os.path.join("other", "data", "file"), ["data/file"], False),
4562
(
4663
os.path.join(
4764
os.path.sep,
@@ -55,20 +72,96 @@ def test_ignore_from_file_should_filter_dirs_and_files():
5572
["to_ignore"],
5673
True,
5774
),
75+
# If there is a separator at the end of the pattern then the pattern
76+
# will only match directories,
77+
# otherwise the pattern can match both files and directories.
78+
# For example, a pattern doc/frotz/ matches doc/frotz directory,
79+
# but not a/doc/frotz directory;
80+
# see (`tests/func/test_ignore.py::test_ignore_sub_directory`)
81+
# however frotz/ matches frotz and a/frotz that is a directory
82+
# (all paths are relative from the .gitignore file).
83+
# see (`tests/func/test_ignore.py::test_ignore_directory`)
84+
# An asterisk "*" matches anything except a slash.
5885
("to_ignore.txt", ["/*.txt"], True),
86+
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
87+
(os.path.join("data", "file.txt"), ["data/*"], True),
88+
# wait for Git
89+
# (os.path.join("data", "sub", "file.txt"), ["data/*"], True),
5990
(
6091
os.path.join("rel", "path", "path2", "to_ignore"),
6192
["rel/*/to_ignore"],
6293
False,
6394
),
64-
(os.path.join("path", "to_ignore.txt"), ["/*.txt"], False),
95+
("file.txt", ["file.*"], True),
96+
# The character "?" matches any one character except "/".
97+
("file.txt", ["fi?e.t?t"], True),
98+
("fi/e.txt", ["fi?e.t?t"], False),
99+
# The range notation, e.g. [a-zA-Z], can be used
100+
# to match one of the characters in a range. See fnmatch(3) and
101+
# the FNM_PATHNAME flag for a more detailed description.
102+
("file.txt", ["[a-zA-Z]ile.txt"], True),
103+
("2ile.txt", ["[a-zA-Z]ile.txt"], False),
104+
# Two consecutive asterisks ("**") in patterns matched against
105+
# full pathname may have special meaning:
106+
# A leading "**" followed by a slash means match in all directories.
107+
# For example, "**/foo" matches file or directory "foo" anywhere, the
108+
# same as pattern "foo".
109+
# "**/foo/bar" matches file or directory "bar" anywhere that is
110+
# directly under directory "foo".
111+
(os.path.join("rel", "p", "p2", "to_ignore"), ["**/to_ignore"], True,),
112+
(
113+
os.path.join("rel", "p", "p2", "to_ignore"),
114+
["**/p2/to_ignore"],
115+
True,
116+
),
117+
(
118+
os.path.join("rel", "path", "path2", "dont_ignore"),
119+
["**/to_ignore"],
120+
False,
121+
),
122+
# A trailing "/**" matches everything inside.
123+
# For example, "abc/**" matches all files inside directory "abc",
124+
# relative to the location of the .gitignore file, with infinite depth.
125+
(os.path.join("rel", "p", "p2", "to_ignore"), ["rel/**"], True,),
126+
(os.path.join("rel", "p", "p2", "to_ignore"), ["p/**"], False,),
127+
(
128+
os.path.join("rel", "path", "path2", "dont_ignore"),
129+
["rel/**"],
130+
True,
131+
),
132+
# A slash followed by two consecutive asterisks then a slash matches
133+
# zero or more directories.
134+
# For example, "a/**/b" matches "a/b", "a/x/b", "a/x/y/b" and so on.
135+
(os.path.join("rel", "p", "to_ignore"), ["rel/**/to_ignore"], True,),
136+
(
137+
os.path.join("rel", "p", "p2", "to_ignore"),
138+
["rel/**/to_ignore"],
139+
True,
140+
),
65141
(
66142
os.path.join("rel", "path", "path2", "dont_ignore"),
67143
["rel/**/to_ignore"],
68144
False,
69145
),
70-
("dont_ignore.txt", ["dont_ignore"], False),
71-
("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False),
146+
(
147+
os.path.join("rel", "path", "path2", "dont_ignore"),
148+
["path/**/dont_ignore"],
149+
False,
150+
),
151+
# Other consecutive asterisks are considered regular asterisks
152+
# and will match according to the previous rules.
153+
("to_ignore.txt", ["/***.txt"], True),
154+
(os.path.join("path", "to_ignore.txt"), ["/****.txt"], False),
155+
(os.path.join("path", "to_ignore.txt"), ["****.txt"], True),
156+
(os.path.join("data", "file.txt"), ["data/***"], True),
157+
# bug from PathSpec
158+
# (os.path.join("data", "p", "file.txt"), ["data/***"], False),
159+
(os.path.join("data", "p", "file.txt"), ["***/file.txt"], False),
160+
(
161+
os.path.join("rel", "path", "path2", "to_ignore"),
162+
["rel/***/to_ignore"],
163+
False,
164+
),
72165
],
73166
)
74167
def test_match_ignore_from_file(
@@ -99,18 +192,3 @@ def test_should_ignore_dir(omit_dir):
99192
new_dirs, _ = ignore(root, dirs, files)
100193

101194
assert set(new_dirs) == {"dir1", "dir2"}
102-
103-
104-
def test_ignore_order():
105-
dvcignore_path = os.path.join(os.path.sep, "ignore_order", ".dvcignore")
106-
107-
patterns = ["!ac*", "a*", "!ab*"]
108-
109-
root = os.path.dirname(dvcignore_path)
110-
dirs = ["ignore_order"]
111-
files = ["ac", "ab", "aa"]
112-
113-
ignore = mock_dvcignore(dvcignore_path, patterns)
114-
_, new_files = ignore(root, dirs, files)
115-
116-
assert {"ab"} == set(new_files)

0 commit comments

Comments
 (0)