Skip to content

GH-125413: Add pathlib.Path.scandir() method #126060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,35 @@ Reading directories
raised.


.. method:: Path.scandir()

When the path points to a directory, return an iterator of
:class:`os.DirEntry` objects corresponding to entries in the directory. The
returned iterator supports the :term:`context manager` protocol. It is
implemented using :func:`os.scandir` and gives the same guarantees.

Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
significantly increase the performance of code that also needs file type or
file attribute information, because :class:`os.DirEntry` objects expose
this information if the operating system provides it when scanning a
directory.

The following example displays the names of subdirectories. The
``entry.is_dir()`` check will generally not make an additional system call::

>>> p = Path('docs')
>>> with p.scandir() as entries:
... for entry in entries:
... if entry.is_dir():
... entry.name
...
'_templates'
'_build'
'_static'

.. versionadded:: 3.14


.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,12 @@ pathlib

(Contributed by Barney Gale in :gh:`73991`.)

* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
of :class:`os.DirEntry` objects. This is exactly equivalent to calling
:func:`os.scandir` on a path object.

(Contributed by Barney Gale in :gh:`125413`.)


pdb
---
Expand Down
12 changes: 11 additions & 1 deletion Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.

The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('scandir()'))

def iterdir(self):
"""Yield path objects of the directory contents.

The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
with self.scandir() as entries:
names = [entry.name for entry in entries]
return map(self.joinpath, names)

def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
Expand Down
8 changes: 8 additions & 0 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1]
yield path_str

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.

The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
return os.scandir(self)

def iterdir(self):
"""Yield path objects of the directory contents.

Expand Down
67 changes: 57 additions & 10 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
import contextlib
import io
import os
import errno
Expand Down Expand Up @@ -1424,6 +1425,24 @@ def close(self):
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')


class DummyDirEntry:
"""
Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
"""
__slots__ = ('name', '_is_symlink', '_is_dir')

def __init__(self, name, is_symlink, is_dir):
self.name = name
self._is_symlink = is_symlink
self._is_dir = is_dir

def is_symlink(self):
return self._is_symlink

def is_dir(self, *, follow_symlinks=True):
return self._is_dir and (follow_symlinks or not self._is_symlink)


class DummyPath(PathBase):
"""
Simple implementation of PathBase that keeps files and directories in
Expand Down Expand Up @@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None,
stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
return stream

def iterdir(self):
path = str(self.resolve())
if path in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
elif path in self._directories:
return iter([self / name for name in self._directories[path]])
@contextlib.contextmanager
def scandir(self):
path = self.resolve()
path_str = str(path)
if path_str in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
elif path_str in self._directories:
yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
else:
raise FileNotFoundError(errno.ENOENT, "File not found", path)
raise FileNotFoundError(errno.ENOENT, "File not found", path_str)

@property
def _dir_entry(self):
path_str = str(self)
is_symlink = path_str in self._symlinks
is_directory = (path_str in self._directories
if not is_symlink
else self._symlinks[path_str][1])
return DummyDirEntry(self.name, is_symlink, is_directory)

def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name)
Expand Down Expand Up @@ -1602,7 +1632,7 @@ def setUp(self):
if self.can_symlink:
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB')
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
Expand Down Expand Up @@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))

def test_scandir(self):
p = self.cls(self.base)
with p.scandir() as entries:
self.assertTrue(list(entries))
with p.scandir() as entries:
for entry in entries:
child = p / entry.name
self.assertIsNotNone(entry)
self.assertEqual(entry.name, child.name)
self.assertEqual(entry.is_symlink(),
child.is_symlink())
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
if entry.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())


def test_glob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
Expand Down Expand Up @@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
def readlink(self):
path = str(self.parent.resolve() / self.name)
if path in self._symlinks:
return self.with_segments(self._symlinks[path])
return self.with_segments(self._symlinks[path][0])
elif path in self._files or path in self._directories:
raise OSError(errno.EINVAL, "Not a symlink", path)
else:
Expand All @@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False):
if path in self._symlinks:
raise FileExistsError(errno.EEXIST, "File exists", path)
self._directories[parent].add(self.name)
self._symlinks[path] = str(target)
self._symlinks[path] = str(target), target_is_directory


class DummyPathWithSymlinksTest(DummyPathTest):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
children and their file attributes. This is a trivial wrapper of
:func:`os.scandir`.
Loading