Skip to content

Commit c69c00c

Browse files
committed
wip: re-implement fnmatch
1 parent 57d8649 commit c69c00c

File tree

3 files changed

+122
-64
lines changed

3 files changed

+122
-64
lines changed

coverage/files.py

+32-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
"""File wrangling."""
55

6-
import fnmatch
76
import hashlib
87
import ntpath
98
import os
@@ -282,7 +281,35 @@ def sep(s):
282281
return the_sep
283282

284283

284+
PATHEX_SUBS = [(re.compile(rx), sub) for rx, sub in [
285+
(r"^\*+[/\\]", r"^(.*[/\\\\])?"),
286+
(r"[/\\]\*+$", r".*"),
287+
(r"\*\*+[/\\]?", r".*"),
288+
(r"[/\\]", r"[/\\\\]"),
289+
(r"\*", r"[^/\\\\]*"),
290+
(r"\?", r"[^/\\\\]"),
291+
(r"\[.*?\]", r"\g<0>"),
292+
(r"[a-zA-Z0-9_-]+", r"\g<0>"),
293+
(r".", r"\\\g<0>"),
294+
]]
295+
296+
def pathex(pattern):
297+
"""Convert a file-path pattern into a regex."""
298+
if not re.search(r"[/\\]", pattern):
299+
pattern = "**/" + pattern
300+
path_rx = ""
301+
pos = 0
302+
while pos < len(pattern):
303+
for rx, sub in PATHEX_SUBS:
304+
m = rx.match(pattern, pos=pos)
305+
if m:
306+
path_rx += m.expand(sub)
307+
pos = m.end()
308+
break
309+
return path_rx
310+
285311
def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
312+
1 # todo: fix this docstring
286313
"""Convert fnmatch patterns to a compiled regex that matches any of them.
287314
288315
Slashes are always converted to match either slash or backslash, for
@@ -295,24 +322,13 @@ def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
295322
strings.
296323
297324
"""
298-
regexes = (fnmatch.translate(pattern) for pattern in patterns)
299-
# */ at the start should also match nothing.
300-
regexes = (re.sub(r"^\(\?s:\.\*(\\\\|/)", r"(?s:^(.*\1)?", regex) for regex in regexes)
301-
# Be agnostic: / can mean backslash or slash.
302-
regexes = (re.sub(r"/", r"[\\\\/]", regex) for regex in regexes)
303-
304-
if partial:
305-
# fnmatch always adds a \Z to match the whole string, which we don't
306-
# want, so we remove the \Z. While removing it, we only replace \Z if
307-
# followed by paren (introducing flags), or at end, to keep from
308-
# destroying a literal \Z in the pattern.
309-
regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)
310-
311325
flags = 0
312326
if case_insensitive:
313327
flags |= re.IGNORECASE
314-
compiled = re.compile(join_regex(regexes), flags=flags)
315-
328+
rx = join_regex(map(pathex, patterns))
329+
if not partial:
330+
rx = rf"(?:{rx})\Z"
331+
compiled = re.compile(rx, flags=flags)
316332
return compiled
317333

318334

tests/test_api.py

-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ def test_unexecuted_file(self):
7171
assert missing == [1]
7272

7373
def test_filenames(self):
74-
7574
self.make_file("mymain.py", """\
7675
import mymod
7776
a = 1

tests/test_files.py

+90-47
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
"""Tests for files.py"""
55

6+
import itertools
67
import os
78
import os.path
89
from unittest import mock
@@ -104,59 +105,99 @@ def test_flat_rootname(original, flat):
104105
assert flat_rootname(original) == flat
105106

106107

108+
def gen_params(patterns, case_insensitive=False, partial=False, matches=(), nomatches=()):
109+
pat_id = "|".join(patterns)
110+
for text in matches:
111+
yield pytest.param(
112+
patterns, case_insensitive, partial, text, True,
113+
id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:match",
114+
)
115+
for text in nomatches:
116+
yield pytest.param(
117+
patterns, case_insensitive, partial, text, False,
118+
id=f"{pat_id}:ci{case_insensitive}:par{partial}:{text}:nomatch",
119+
)
120+
107121
@pytest.mark.parametrize(
108-
"patterns, case_insensitive, partial," +
109-
"matches," +
110-
"nomatches",
111-
[
112-
(
113-
["abc", "xyz"], False, False,
114-
["abc", "xyz"],
115-
["ABC", "xYz", "abcx", "xabc", "axyz", "xyza"],
122+
"patterns, case_insensitive, partial, text, result",
123+
list(itertools.chain.from_iterable([
124+
gen_params(
125+
["abc", "xyz"],
126+
matches=["abc", "xyz", "sub/mod/abc"],
127+
nomatches=["ABC", "xYz", "abcx", "xabc", "axyz", "xyza", "sub/mod/abcd", "sub/abc/more"],
116128
),
117-
(
118-
["abc", "xyz"], True, False,
119-
["abc", "xyz", "Abc", "XYZ", "AbC"],
120-
["abcx", "xabc", "axyz", "xyza"],
129+
gen_params(
130+
["abc", "xyz"], case_insensitive=True,
131+
matches=["abc", "xyz", "Abc", "XYZ", "AbC"],
132+
nomatches=["abcx", "xabc", "axyz", "xyza"],
121133
),
122-
(
123-
["abc/hi.py"], True, False,
124-
["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"],
125-
["abc_hi.py", "abc/hi.pyc"],
134+
gen_params(
135+
["a?c", "x?z"],
136+
matches=["abc", "xyz", "xYz", "azc", "xaz"],
137+
nomatches=["ABC", "abcx", "xabc", "axyz", "xyza"],
126138
),
127-
(
128-
[r"abc\hi.py"], True, False,
129-
[r"abc\hi.py", r"ABC\hi.py"],
130-
["abc/hi.py", "ABC/hi.py", "abc_hi.py", "abc/hi.pyc"],
139+
gen_params(
140+
["a??d"],
141+
matches=["abcd", "azcd", "a12d"],
142+
nomatches=["ABCD", "abcx", "axyz", "abcde"],
131143
),
132-
(
133-
["abc/*/hi.py"], True, False,
134-
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
135-
["abc/hi.py", "abc/hi.pyc"],
144+
gen_params(
145+
["abc/hi.py"], case_insensitive=True,
146+
matches=["abc/hi.py", "ABC/hi.py", r"ABC\hi.py"],
147+
nomatches=["abc_hi.py", "abc/hi.pyc"],
136148
),
137-
(
138-
["abc/[a-f]*/hi.py"], True, False,
139-
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
140-
["abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc"],
149+
gen_params(
150+
[r"abc\hi.py"], case_insensitive=True,
151+
matches=[r"abc\hi.py", r"ABC\hi.py", "abc/hi.py", "ABC/hi.py"],
152+
nomatches=["abc_hi.py", "abc/hi.pyc"],
141153
),
142-
(
143-
["abc/"], True, True,
144-
["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
145-
["abcd/foo.py", "xabc/hi.py"],
154+
gen_params(
155+
["abc/*/hi.py"], case_insensitive=True,
156+
matches=["abc/foo/hi.py", r"ABC\foo/hi.py"],
157+
nomatches=["abc/hi.py", "abc/hi.pyc", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
146158
),
147-
(
148-
["*/foo"], False, True,
149-
["abc/foo/hi.py", "foo/hi.py"],
150-
["abc/xfoo/hi.py"],
159+
gen_params(
160+
["abc/**/hi.py"], case_insensitive=True,
161+
matches=[
162+
"abc/foo/hi.py", r"ABC\foo/hi.py", "abc/hi.py", "ABC/foo/bar/hi.py",
163+
r"ABC\foo/bar/hi.py",
164+
],
165+
nomatches=["abc/hi.pyc"],
151166
),
152-
153-
])
154-
def test_fnmatches_to_regex(patterns, case_insensitive, partial, matches, nomatches):
167+
gen_params(
168+
["abc/[a-f]*/hi.py"], case_insensitive=True,
169+
matches=["abc/foo/hi.py", r"ABC\boo/hi.py"],
170+
nomatches=[
171+
"abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py", r"abc\foo/bar/hi.py",
172+
],
173+
),
174+
gen_params(
175+
["abc/[a-f]/hi.py"], case_insensitive=True,
176+
matches=["abc/f/hi.py", r"ABC\b/hi.py"],
177+
nomatches=[
178+
"abc/foo/hi.py", "abc/zoo/hi.py", "abc/hi.py", "abc/hi.pyc", "abc/foo/bar/hi.py",
179+
r"abc\foo/bar/hi.py",
180+
],
181+
),
182+
gen_params(
183+
["abc/"], case_insensitive=True, partial=True,
184+
matches=["abc/foo/hi.py", "ABC/foo/bar/hi.py", r"ABC\foo/bar/hi.py"],
185+
nomatches=["abcd/foo.py", "xabc/hi.py"],
186+
),
187+
gen_params(
188+
["*/foo"], case_insensitive=False, partial=True,
189+
matches=["abc/foo/hi.py", "foo/hi.py"],
190+
nomatches=["abc/xfoo/hi.py"],
191+
),
192+
gen_params(
193+
["**/foo"],
194+
matches=["foo", "hello/foo", "hi/there/foo"],
195+
nomatches=["foob", "hello/foob", "hello/Foo"],
196+
),
197+
])))
198+
def test_fnmatches_to_regex(patterns, case_insensitive, partial, text, result):
155199
regex = fnmatches_to_regex(patterns, case_insensitive=case_insensitive, partial=partial)
156-
for s in matches:
157-
assert regex.match(s)
158-
for s in nomatches:
159-
assert not regex.match(s)
200+
assert bool(regex.match(text)) == result
160201

161202

162203
class MatcherTest(CoverageTest):
@@ -235,6 +276,8 @@ def test_fnmatch_matcher_overload(self):
235276
self.assertMatches(fnm, "x007foo.txt", True)
236277
self.assertMatches(fnm, "x123foo.txt", True)
237278
self.assertMatches(fnm, "x798bar.txt", False)
279+
self.assertMatches(fnm, "x499.txt", True)
280+
self.assertMatches(fnm, "x500.txt", False)
238281

239282
def test_fnmatch_windows_paths(self):
240283
# We should be able to match Windows paths even if we are running on
@@ -309,9 +352,9 @@ def test_multiple_patterns(self, rel_yn):
309352
assert msgs == [
310353
"Aliases (relative=True):",
311354
" Rule: '/home/*/src' -> './mysrc/' using regex " +
312-
"'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'",
355+
"'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'",
313356
" Rule: '/lib/*/libsrc' -> './mylib/' using regex " +
314-
"'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'",
357+
"'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'",
315358
"Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " +
316359
"producing './mysrc/a.py'",
317360
"Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " +
@@ -321,9 +364,9 @@ def test_multiple_patterns(self, rel_yn):
321364
assert msgs == [
322365
"Aliases (relative=False):",
323366
" Rule: '/home/*/src' -> './mysrc/' using regex " +
324-
"'(?s:[\\\\\\\\/]home[\\\\\\\\/].*[\\\\\\\\/]src[\\\\\\\\/])'",
367+
"'[/\\\\\\\\]home[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]src[/\\\\\\\\]'",
325368
" Rule: '/lib/*/libsrc' -> './mylib/' using regex " +
326-
"'(?s:[\\\\\\\\/]lib[\\\\\\\\/].*[\\\\\\\\/]libsrc[\\\\\\\\/])'",
369+
"'[/\\\\\\\\]lib[/\\\\\\\\][^/\\\\\\\\]*[/\\\\\\\\]libsrc[/\\\\\\\\]'",
327370
"Matched path '/home/foo/src/a.py' to rule '/home/*/src' -> './mysrc/', " +
328371
f"producing {files.canonical_filename('./mysrc/a.py')!r}",
329372
"Matched path '/lib/foo/libsrc/a.py' to rule '/lib/*/libsrc' -> './mylib/', " +

0 commit comments

Comments
 (0)