@@ -63,6 +63,12 @@ def _compile_pattern(pat, sep, case_sensitive):
63
63
return re .compile (regex , flags = flags ).match
64
64
65
65
66
+ def _select_special (paths , part ):
67
+ """Yield special literal children of the given paths."""
68
+ for path in paths :
69
+ yield path ._make_child_relpath (part )
70
+
71
+
66
72
def _select_children (parent_paths , dir_only , follow_symlinks , match ):
67
73
"""Yield direct children of given paths, filtering by name and type."""
68
74
if follow_symlinks is None :
@@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
84
90
except OSError :
85
91
continue
86
92
if match (entry .name ):
87
- yield parent_path ._make_child_entry (entry , dir_only )
93
+ yield parent_path ._make_child_entry (entry )
88
94
89
95
90
96
def _select_recursive (parent_paths , dir_only , follow_symlinks ):
@@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
107
113
for entry in entries :
108
114
try :
109
115
if entry .is_dir (follow_symlinks = follow_symlinks ):
110
- paths .append (path ._make_child_entry (entry , dir_only ))
116
+ paths .append (path ._make_child_entry (entry ))
111
117
continue
112
118
except OSError :
113
119
pass
@@ -427,6 +433,14 @@ def is_absolute(self):
427
433
a drive)."""
428
434
return self .pathmod .isabs (self ._raw_path )
429
435
436
+ @property
437
+ def _pattern_stack (self ):
438
+ """Stack of path components, to be used with patterns in glob()."""
439
+ anchor , parts = self ._stack
440
+ if anchor :
441
+ raise NotImplementedError ("Non-relative patterns are unsupported" )
442
+ return parts
443
+
430
444
def match (self , path_pattern , * , case_sensitive = None ):
431
445
"""
432
446
Return True if this path matches the given pattern.
@@ -436,11 +450,10 @@ def match(self, path_pattern, *, case_sensitive=None):
436
450
if case_sensitive is None :
437
451
case_sensitive = _is_case_sensitive (self .pathmod )
438
452
sep = path_pattern .pathmod .sep
439
- pattern_str = str (path_pattern )
440
453
if path_pattern .anchor :
441
- pass
454
+ pattern_str = str ( path_pattern )
442
455
elif path_pattern .parts :
443
- pattern_str = f '**{ sep } { pattern_str } '
456
+ pattern_str = str ( '**' / path_pattern )
444
457
else :
445
458
raise ValueError ("empty pattern" )
446
459
match = _compile_pattern (pattern_str , sep , case_sensitive )
@@ -714,10 +727,8 @@ def _scandir(self):
714
727
from contextlib import nullcontext
715
728
return nullcontext (self .iterdir ())
716
729
717
- def _make_child_entry (self , entry , is_dir = False ):
730
+ def _make_child_entry (self , entry ):
718
731
# Transform an entry yielded from _scandir() into a path object.
719
- if is_dir :
720
- return entry .joinpath ('' )
721
732
return entry
722
733
723
734
def _make_child_relpath (self , name ):
@@ -727,57 +738,35 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
727
738
"""Iterate over this subtree and yield all existing files (of any
728
739
kind, including directories) matching the given relative pattern.
729
740
"""
730
- path_pattern = self .with_segments (pattern )
731
- if path_pattern .anchor :
732
- raise NotImplementedError ("Non-relative patterns are unsupported" )
733
- elif not path_pattern .parts :
734
- raise ValueError ("Unacceptable pattern: {!r}" .format (pattern ))
735
-
736
- pattern_parts = list (path_pattern .parts )
737
- if not self .pathmod .split (pattern )[1 ]:
738
- # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
739
- pattern_parts .append ('' )
740
-
741
+ if not isinstance (pattern , PurePathBase ):
742
+ pattern = self .with_segments (pattern )
741
743
if case_sensitive is None :
742
744
# TODO: evaluate case-sensitivity of each directory in _select_children().
743
745
case_sensitive = _is_case_sensitive (self .pathmod )
744
746
745
- # If symlinks are handled consistently, and the pattern does not
746
- # contain '..' components, then we can use a 'walk-and-match' strategy
747
- # when expanding '**' wildcards. When a '**' wildcard is encountered,
748
- # all following pattern parts are immediately consumed and used to
749
- # build a `re.Pattern` object. This pattern is used to filter the
750
- # recursive walk. As a result, pattern parts following a '**' wildcard
751
- # do not perform any filesystem access, which can be much faster!
752
- filter_paths = follow_symlinks is not None and '..' not in pattern_parts
747
+ stack = pattern ._pattern_stack
748
+ specials = ('' , '.' , '..' )
749
+ filter_paths = False
753
750
deduplicate_paths = False
754
751
sep = self .pathmod .sep
755
752
paths = iter ([self .joinpath ('' )] if self .is_dir () else [])
756
- part_idx = 0
757
- while part_idx < len (pattern_parts ):
758
- part = pattern_parts [part_idx ]
759
- part_idx += 1
760
- if part == '' :
761
- # Trailing slash.
762
- pass
763
- elif part == '..' :
764
- paths = (path ._make_child_relpath ('..' ) for path in paths )
753
+ while stack :
754
+ part = stack .pop ()
755
+ if part in specials :
756
+ paths = _select_special (paths , part )
765
757
elif part == '**' :
766
758
# Consume adjacent '**' components.
767
- while part_idx < len (pattern_parts ) and pattern_parts [part_idx ] == '**' :
768
- part_idx += 1
769
-
770
- if filter_paths and part_idx < len (pattern_parts ) and pattern_parts [part_idx ] != '' :
771
- dir_only = pattern_parts [- 1 ] == ''
772
- paths = _select_recursive (paths , dir_only , follow_symlinks )
759
+ while stack and stack [- 1 ] == '**' :
760
+ stack .pop ()
773
761
774
- # Filter out paths that don't match pattern.
775
- prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
776
- match = _compile_pattern (str (path_pattern ), sep , case_sensitive )
777
- paths = (path for path in paths if match (str (path ), prefix_len ))
778
- return paths
762
+ # Consume adjacent non-special components and enable post-walk
763
+ # regex filtering, provided we're treating symlinks consistently.
764
+ if follow_symlinks is not None :
765
+ while stack and stack [- 1 ] not in specials :
766
+ filter_paths = True
767
+ stack .pop ()
779
768
780
- dir_only = part_idx < len ( pattern_parts )
769
+ dir_only = bool ( stack )
781
770
paths = _select_recursive (paths , dir_only , follow_symlinks )
782
771
if deduplicate_paths :
783
772
# De-duplicate if we've already seen a '**' component.
@@ -786,18 +775,25 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
786
775
elif '**' in part :
787
776
raise ValueError ("Invalid pattern: '**' can only be an entire path component" )
788
777
else :
789
- dir_only = part_idx < len ( pattern_parts )
778
+ dir_only = bool ( stack )
790
779
match = _compile_pattern (part , sep , case_sensitive )
791
780
paths = _select_children (paths , dir_only , follow_symlinks , match )
781
+ if filter_paths :
782
+ # Filter out paths that don't match pattern.
783
+ prefix_len = len (str (self ._make_child_relpath ('_' ))) - 1
784
+ match = _compile_pattern (str (pattern ), sep , case_sensitive )
785
+ paths = (path for path in paths if match (str (path ), prefix_len ))
792
786
return paths
793
787
794
788
def rglob (self , pattern , * , case_sensitive = None , follow_symlinks = None ):
795
789
"""Recursively yield all existing files (of any kind, including
796
790
directories) matching the given relative pattern, anywhere in
797
791
this subtree.
798
792
"""
799
- return self .glob (
800
- f'**/{ pattern } ' , case_sensitive = case_sensitive , follow_symlinks = follow_symlinks )
793
+ if not isinstance (pattern , PurePathBase ):
794
+ pattern = self .with_segments (pattern )
795
+ pattern = '**' / pattern
796
+ return self .glob (pattern , case_sensitive = case_sensitive , follow_symlinks = follow_symlinks )
801
797
802
798
def walk (self , top_down = True , on_error = None , follow_symlinks = False ):
803
799
"""Walk the directory tree from this directory, similar to os.walk()."""
0 commit comments