Skip to content

GH-77609: Add recurse_symlinks argument to pathlib.Path.glob() #117311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)
.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand Down Expand Up @@ -1013,28 +1013,23 @@ call fails (for example because the path doesn't exist).
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.

By default, or when the *follow_symlinks* keyword-only argument is set to
``None``, this method follows symlinks except when expanding "``**``"
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
``False`` to treat all symlinks as files.

.. tip::
Set *follow_symlinks* to ``True`` or ``False`` to improve performance
of recursive globbing.
By default, or when the *recurse_symlinks* keyword-only argument is set to
``False``, this method follows symlinks except when expanding "``**``"
wildcards. Set *recurse_symlinks* to ``True`` to always follow symlinks.

.. audit-event:: pathlib.Path.glob self,pattern pathlib.Path.glob

.. versionchanged:: 3.12
The *case_sensitive* parameter was added.

.. versionchanged:: 3.13
The *follow_symlinks* parameter was added.
The *recurse_symlinks* parameter was added.

.. versionchanged:: 3.13
The *pattern* parameter accepts a :term:`path-like object`.


.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)
.. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*.
Expand All @@ -1048,7 +1043,7 @@ call fails (for example because the path doesn't exist).
The *case_sensitive* parameter was added.

.. versionchanged:: 3.13
The *follow_symlinks* parameter was added.
The *recurse_symlinks* parameter was added.

.. versionchanged:: 3.13
The *pattern* parameter accepts a :term:`path-like object`.
Expand Down Expand Up @@ -1675,7 +1670,7 @@ The patterns accepted and results generated by :meth:`Path.glob` and
passing ``recursive=True`` to :func:`glob.glob`.
3. "``**``" pattern components do not follow symlinks by default in pathlib.
This behaviour has no equivalent in :func:`glob.glob`, but you can pass
``follow_symlinks=True`` to :meth:`Path.glob` for compatible behaviour.
``recurse_symlinks=True`` to :meth:`Path.glob` for compatible behaviour.
4. Like all :class:`PurePath` and :class:`Path` objects, the values returned
from :meth:`Path.glob` and :meth:`Path.rglob` don't include trailing
slashes.
Expand Down
11 changes: 7 additions & 4 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -559,12 +559,15 @@ pathlib
implementation of :mod:`os.path` used for low-level path parsing and
joining: either ``posixpath`` or ``ntpath``.

* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`,
:meth:`~pathlib.Path.rglob`, :meth:`~pathlib.Path.is_file`,
* Add *recurse_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`
and :meth:`~pathlib.Path.rglob`.
(Contributed by Barney Gale in :gh:`77609`).

* Add *follow_symlinks* keyword-only argument to :meth:`~pathlib.Path.is_file`,
:meth:`~pathlib.Path.is_dir`, :meth:`~pathlib.Path.owner`,
:meth:`~pathlib.Path.group`.
(Contributed by Barney Gale in :gh:`77609` and :gh:`105793`, and
Kamil Turek in :gh:`107962`).
(Contributed by Barney Gale in :gh:`105793`, and Kamil Turek in
:gh:`107962`).

* Return files and directories from :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob` when given a pattern that ends with "``**``". In
Expand Down
8 changes: 4 additions & 4 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,17 +619,17 @@ def _make_child_relpath(self, name):
path._tail_cached = tail + [name]
return path

def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
sys.audit("pathlib.Path.glob", self, pattern)
if not isinstance(pattern, PurePath):
pattern = self.with_segments(pattern)
return _abc.PathBase.glob(
self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)

def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -639,7 +639,7 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return _abc.PathBase.glob(
self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
Expand Down
20 changes: 8 additions & 12 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,8 @@ def _select_special(paths, part):
yield path._make_child_relpath(part)


def _select_children(parent_paths, dir_only, follow_symlinks, match):
def _select_children(parent_paths, dir_only, match):
"""Yield direct children of given paths, filtering by name and type."""
if follow_symlinks is None:
follow_symlinks = True
for parent_path in parent_paths:
try:
# We must close the scandir() object before proceeding to
Expand All @@ -82,7 +80,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
for entry in entries:
if dir_only:
try:
if not entry.is_dir(follow_symlinks=follow_symlinks):
if not entry.is_dir():
continue
except OSError:
continue
Expand All @@ -96,8 +94,6 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
"""Yield given paths and all their children, recursively, filtering by
string and type.
"""
if follow_symlinks is None:
follow_symlinks = False
for parent_path in parent_paths:
if match is not None:
# If we're filtering paths through a regex, record the length of
Expand Down Expand Up @@ -789,7 +785,7 @@ def _make_child_direntry(self, entry):
def _make_child_relpath(self, name):
return self.joinpath(name)

def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand Down Expand Up @@ -818,7 +814,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
# Consume following non-special components, provided we're
# treating symlinks consistently. Each component is joined
# onto 'part', which is used to generate an re.Pattern object.
if follow_symlinks is not None:
if recurse_symlinks:
while stack and stack[-1] not in specials:
part += sep + stack.pop()

Expand All @@ -827,7 +823,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None

# Recursively walk directories, filtering by type and regex.
paths = _select_recursive(paths, bool(stack), follow_symlinks, match)
paths = _select_recursive(paths, bool(stack), recurse_symlinks, match)

# De-duplicate if we've already seen a '**' component.
if deduplicate_paths:
Expand All @@ -843,18 +839,18 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None

# Iterate over directories' children filtering by type and regex.
paths = _select_children(paths, bool(stack), follow_symlinks, match)
paths = _select_children(paths, bool(stack), match)
return paths

def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
"""
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
Expand Down
69 changes: 7 additions & 62 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,9 +1776,9 @@ def _check(path, pattern, case_sensitive, expected):
_check(path, "dirb/file*", False, ["dirB/fileB"])

@needs_symlinks
def test_glob_follow_symlinks_common(self):
def test_glob_recurse_symlinks_common(self):
def _check(path, glob, expected):
actual = {path for path in path.glob(glob, follow_symlinks=True)
actual = {path for path in path.glob(glob, recurse_symlinks=True)
if path.parts.count("linkD") <= 1} # exclude symlink loop.
self.assertEqual(actual, { P(self.base, q) for q in expected })
P = self.cls
Expand Down Expand Up @@ -1812,39 +1812,9 @@ def _check(path, glob, expected):
_check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"])
_check(p, "*/dirD/**/", ["dirC/dirD/"])

@needs_symlinks
def test_glob_no_follow_symlinks_common(self):
def _check(path, glob, expected):
actual = {path for path in path.glob(glob, follow_symlinks=False)}
self.assertEqual(actual, { P(self.base, q) for q in expected })
P = self.cls
p = P(self.base)
_check(p, "fileB", [])
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
_check(p, "*A", ["dirA", "fileA", "linkA"])
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
_check(p, "*/fileB", ["dirB/fileB"])
_check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"])
_check(p, "dir*/*/..", ["dirC/dirD/.."])
_check(p, "dir*/**", [
"dirA/", "dirA/linkC",
"dirB/", "dirB/fileB", "dirB/linkD",
"dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt",
"dirE/"])
_check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"])
_check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."])
_check(p, "dir*/*/**", ["dirC/dirD/", "dirC/dirD/fileD"])
_check(p, "dir*/*/**/", ["dirC/dirD/"])
_check(p, "dir*/*/**/..", ["dirC/dirD/.."])
_check(p, "dir*/**/fileC", ["dirC/fileC"])
_check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD/", "dirC/dirD/../dirD/fileD"])
_check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"])
_check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"])
_check(p, "*/dirD/**/", ["dirC/dirD/"])

def test_rglob_follow_symlinks_none(self):
def test_rglob_recurse_symlinks_false(self):
def _check(path, glob, expected):
actual = set(path.rglob(glob, follow_symlinks=None))
actual = set(path.rglob(glob, recurse_symlinks=False))
self.assertEqual(actual, { P(self.base, q) for q in expected })
P = self.cls
p = P(self.base)
Expand Down Expand Up @@ -1901,9 +1871,9 @@ def test_rglob_windows(self):
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})

@needs_symlinks
def test_rglob_follow_symlinks_common(self):
def test_rglob_recurse_symlinks_common(self):
def _check(path, glob, expected):
actual = {path for path in path.rglob(glob, follow_symlinks=True)
actual = {path for path in path.rglob(glob, recurse_symlinks=True)
if path.parts.count("linkD") <= 1} # exclude symlink loop.
self.assertEqual(actual, { P(self.base, q) for q in expected })
P = self.cls
Expand Down Expand Up @@ -1932,37 +1902,12 @@ def _check(path, glob, expected):
_check(p, "*.txt", ["dirC/novel.txt"])
_check(p, "*.*", ["dirC/novel.txt"])

@needs_symlinks
def test_rglob_no_follow_symlinks_common(self):
def _check(path, glob, expected):
actual = {path for path in path.rglob(glob, follow_symlinks=False)}
self.assertEqual(actual, { P(self.base, q) for q in expected })
P = self.cls
p = P(self.base)
_check(p, "fileB", ["dirB/fileB"])
_check(p, "*/fileA", [])
_check(p, "*/fileB", ["dirB/fileB"])
_check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
_check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"])
_check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"])

p = P(self.base, "dirC")
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
"dirC/dirD", "dirC/dirD/fileD"])
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
_check(p, "*/*", ["dirC/dirD/fileD"])
_check(p, "*/", ["dirC/dirD/"])
_check(p, "", ["dirC/", "dirC/dirD/"])
# gh-91616, a re module regression
_check(p, "*.txt", ["dirC/novel.txt"])
_check(p, "*.*", ["dirC/novel.txt"])

@needs_symlinks
def test_rglob_symlink_loop(self):
# Don't get fooled by symlink loops (Issue #26012).
P = self.cls
p = P(self.base)
given = set(p.rglob('*', follow_symlinks=None))
given = set(p.rglob('*', recurse_symlinks=False))
expect = {'brokenLink',
'dirA', 'dirA/linkC',
'dirB', 'dirB/fileB', 'dirB/linkD',
Expand Down
Loading