6
6
"""
7
7
8
8
import contextlib
9
- import fnmatch
10
9
import functools
10
+ import glob
11
11
import io
12
12
import ntpath
13
13
import os
@@ -76,78 +76,16 @@ def _is_case_sensitive(pathmod):
76
76
#
77
77
78
78
79
- # fnmatch.translate() returns a regular expression that includes a prefix and
80
- # a suffix, which enable matching newlines and ensure the end of the string is
81
- # matched, respectively. These features are undesirable for our implementation
82
- # of PurePatch.match(), which represents path separators as newlines and joins
83
- # pattern segments together. As a workaround, we define a slice object that
84
- # can remove the prefix and suffix from any translate() result. See the
85
- # _compile_pattern_lines() function for more details.
86
- _FNMATCH_PREFIX , _FNMATCH_SUFFIX = fnmatch .translate ('_' ).split ('_' )
87
- _FNMATCH_SLICE = slice (len (_FNMATCH_PREFIX ), - len (_FNMATCH_SUFFIX ))
88
- _SWAP_SEP_AND_NEWLINE = {
89
- '/' : str .maketrans ({'/' : '\n ' , '\n ' : '/' }),
90
- '\\ ' : str .maketrans ({'\\ ' : '\n ' , '\n ' : '\\ ' }),
91
- }
92
-
93
-
94
79
@functools .lru_cache (maxsize = 256 )
95
- def _compile_pattern (pat , case_sensitive ):
80
+ def _compile_pattern (pat , sep , case_sensitive ):
96
81
"""Compile given glob pattern to a re.Pattern object (observing case
97
- sensitivity), or None if the pattern should match everything."""
98
- if pat == '*' :
99
- return None
82
+ sensitivity)."""
100
83
flags = re .NOFLAG if case_sensitive else re .IGNORECASE
101
- return re .compile (fnmatch .translate (pat ), flags ).match
102
-
103
-
104
- @functools .lru_cache ()
105
- def _compile_pattern_lines (pattern_lines , case_sensitive ):
106
- """Compile the given pattern lines to an `re.Pattern` object.
107
-
108
- The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
109
- its path separators and newlines swapped (e.g. '**\n *.py`). By using
110
- newlines to separate path components, and not setting `re.DOTALL`, we
111
- ensure that the `*` wildcard cannot match path separators.
112
-
113
- The returned `re.Pattern` object may have its `match()` method called to
114
- match a complete pattern, or `search()` to match from the right. The
115
- argument supplied to these methods must also have its path separators and
116
- newlines swapped.
117
- """
118
-
119
- # Match the start of the path, or just after a path separator
120
- parts = ['^' ]
121
- for part in pattern_lines .splitlines (keepends = True ):
122
- if part == '*\n ' :
123
- part = r'.+\n'
124
- elif part == '*' :
125
- part = r'.+'
126
- elif part == '**\n ' :
127
- # '**/' component: we use '(?s:.)' rather than '.' so that path
128
- # separators (i.e. newlines) are matched. The trailing '^' ensures
129
- # we terminate after a path separator (i.e. on a new line).
130
- part = r'(?s:.)*^'
131
- elif part == '**' :
132
- # '**' component.
133
- part = r'(?s:.)*'
134
- elif '**' in part :
135
- raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
136
- else :
137
- # Any other component: pass to fnmatch.translate(). We slice off
138
- # the common prefix and suffix added by translate() to ensure that
139
- # re.DOTALL is not set, and the end of the string not matched,
140
- # respectively. With DOTALL not set, '*' wildcards will not match
141
- # path separators, because the '.' characters in the pattern will
142
- # not match newlines.
143
- part = fnmatch .translate (part )[_FNMATCH_SLICE ]
144
- parts .append (part )
145
- # Match the end of the path, always.
146
- parts .append (r'\Z' )
147
- flags = re .MULTILINE
148
- if not case_sensitive :
149
- flags |= re .IGNORECASE
150
- return re .compile ('' .join (parts ), flags = flags )
84
+ regex = glob .translate (pat , recursive = True , include_hidden = True , seps = sep )
85
+ # The string representation of an empty path is a single dot ('.'). Empty
86
+ # paths shouldn't match wildcards, so we consume it with an atomic group.
87
+ regex = r'(\.\Z)?+' + regex
88
+ return re .compile (regex , flags ).match
151
89
152
90
153
91
def _select_children (parent_paths , dir_only , follow_symlinks , match ):
@@ -171,7 +109,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
171
109
except OSError :
172
110
continue
173
111
name = entry .name
174
- if match is None or match (name ):
112
+ if match (name ):
175
113
yield parent_path ._make_child_relpath (name )
176
114
177
115
@@ -297,10 +235,6 @@ class PurePath:
297
235
# to implement comparison methods like `__lt__()`.
298
236
'_parts_normcase_cached' ,
299
237
300
- # The `_lines_cached` slot stores the string path with path separators
301
- # and newlines swapped. This is used to implement `match()`.
302
- '_lines_cached' ,
303
-
304
238
# The `_hash` slot stores the hash of the case-normalized string
305
239
# path. It's set when `__hash__()` is called for the first time.
306
240
'_hash' ,
@@ -475,20 +409,6 @@ def _parts_normcase(self):
475
409
self ._parts_normcase_cached = self ._str_normcase .split (self .pathmod .sep )
476
410
return self ._parts_normcase_cached
477
411
478
- @property
479
- def _lines (self ):
480
- # Path with separators and newlines swapped, for pattern matching.
481
- try :
482
- return self ._lines_cached
483
- except AttributeError :
484
- path_str = str (self )
485
- if path_str == '.' :
486
- self ._lines_cached = ''
487
- else :
488
- trans = _SWAP_SEP_AND_NEWLINE [self .pathmod .sep ]
489
- self ._lines_cached = path_str .translate (trans )
490
- return self ._lines_cached
491
-
492
412
def __eq__ (self , other ):
493
413
if not isinstance (other , PurePath ):
494
414
return NotImplemented
@@ -763,13 +683,16 @@ def match(self, path_pattern, *, case_sensitive=None):
763
683
path_pattern = self .with_segments (path_pattern )
764
684
if case_sensitive is None :
765
685
case_sensitive = _is_case_sensitive (self .pathmod )
766
- pattern = _compile_pattern_lines (path_pattern ._lines , case_sensitive )
686
+ sep = path_pattern .pathmod .sep
687
+ pattern_str = str (path_pattern )
767
688
if path_pattern .drive or path_pattern .root :
768
- return pattern . match ( self . _lines ) is not None
689
+ pass
769
690
elif path_pattern ._tail :
770
- return pattern . search ( self . _lines ) is not None
691
+ pattern_str = f'** { sep } { pattern_str } '
771
692
else :
772
693
raise ValueError ("empty pattern" )
694
+ match = _compile_pattern (pattern_str , sep , case_sensitive )
695
+ return match (str (self )) is not None
773
696
774
697
775
698
# Subclassing os.PathLike makes isinstance() checks slower,
@@ -1069,26 +992,19 @@ def _scandir(self):
1069
992
return contextlib .nullcontext (self .iterdir ())
1070
993
1071
994
def _make_child_relpath (self , name ):
1072
- sep = self .pathmod .sep
1073
- lines_name = name .replace ('\n ' , sep )
1074
- lines_str = self ._lines
1075
995
path_str = str (self )
1076
996
tail = self ._tail
1077
997
if tail :
1078
- path_str = f'{ path_str } { sep } { name } '
1079
- lines_str = f'{ lines_str } \n { lines_name } '
998
+ path_str = f'{ path_str } { self .pathmod .sep } { name } '
1080
999
elif path_str != '.' :
1081
1000
path_str = f'{ path_str } { name } '
1082
- lines_str = f'{ lines_str } { lines_name } '
1083
1001
else :
1084
1002
path_str = name
1085
- lines_str = lines_name
1086
1003
path = self .with_segments (path_str )
1087
1004
path ._str = path_str
1088
1005
path ._drv = self .drive
1089
1006
path ._root = self .root
1090
1007
path ._tail_cached = tail + [name ]
1091
- path ._lines_cached = lines_str
1092
1008
return path
1093
1009
1094
1010
def glob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
@@ -1139,6 +1055,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
1139
1055
# do not perform any filesystem access, which can be much faster!
1140
1056
filter_paths = follow_symlinks is not None and '..' not in pattern_parts
1141
1057
deduplicate_paths = False
1058
+ sep = self .pathmod .sep
1142
1059
paths = iter ([self ] if self .is_dir () else [])
1143
1060
part_idx = 0
1144
1061
while part_idx < len (pattern_parts ):
@@ -1159,9 +1076,9 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
1159
1076
paths = _select_recursive (paths , dir_only , follow_symlinks )
1160
1077
1161
1078
# Filter out paths that don't match pattern.
1162
- prefix_len = len (self ._make_child_relpath ('_' ). _lines ) - 1
1163
- match = _compile_pattern_lines ( path_pattern . _lines , case_sensitive ). match
1164
- paths = (path for path in paths if match (path . _lines [ prefix_len :] ))
1079
+ prefix_len = len (str ( self ._make_child_relpath ('_' )) ) - 1
1080
+ match = _compile_pattern ( str ( path_pattern ), sep , case_sensitive )
1081
+ paths = (path for path in paths if match (str ( path ), prefix_len ))
1165
1082
return paths
1166
1083
1167
1084
dir_only = part_idx < len (pattern_parts )
@@ -1174,7 +1091,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
1174
1091
raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
1175
1092
else :
1176
1093
dir_only = part_idx < len (pattern_parts )
1177
- match = _compile_pattern (part , case_sensitive )
1094
+ match = _compile_pattern (part , sep , case_sensitive )
1178
1095
paths = _select_children (paths , dir_only , follow_symlinks , match )
1179
1096
return paths
1180
1097
0 commit comments