Skip to content

Commit

Permalink
GH-125413: Add pathlib.Path.scandir() method (#126060)
Browse files Browse the repository at this point in the history
Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This
will be used to implement several `PathBase` methods more efficiently,
including methods that provide `Path.copy()`.
  • Loading branch information
barneygale authored Nov 1, 2024
1 parent d0abd0b commit 260843d
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 11 deletions.
29 changes: 29 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,35 @@ Reading directories
raised.


.. method:: Path.scandir()

When the path points to a directory, return an iterator of
:class:`os.DirEntry` objects corresponding to entries in the directory. The
returned iterator supports the :term:`context manager` protocol. It is
implemented using :func:`os.scandir` and gives the same guarantees.

Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can
significantly increase the performance of code that also needs file type or
file attribute information, because :class:`os.DirEntry` objects expose
this information if the operating system provides it when scanning a
directory.

The following example displays the names of subdirectories. The
``entry.is_dir()`` check will generally not make an additional system call::

>>> p = Path('docs')
>>> with p.scandir() as entries:
... for entry in entries:
... if entry.is_dir():
... entry.name
...
'_templates'
'_build'
'_static'

.. versionadded:: 3.14


.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,12 @@ pathlib

(Contributed by Barney Gale in :gh:`73991`.)

* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator
of :class:`os.DirEntry` objects. This is exactly equivalent to calling
:func:`os.scandir` on a path object.

(Contributed by Barney Gale in :gh:`125413`.)


pdb
---
Expand Down
12 changes: 11 additions & 1 deletion Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None):
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
return f.write(data)

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('scandir()'))

def iterdir(self):
"""Yield path objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
with self.scandir() as entries:
names = [entry.name for entry in entries]
return map(self.joinpath, names)

def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
Expand Down
8 changes: 8 additions & 0 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths):
path_str = path_str[:-1]
yield path_str

def scandir(self):
"""Yield os.DirEntry objects of the directory contents.
The children are yielded in arbitrary order, and the
special entries '.' and '..' are not included.
"""
return os.scandir(self)

def iterdir(self):
"""Yield path objects of the directory contents.
Expand Down
67 changes: 57 additions & 10 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import collections
import contextlib
import io
import os
import errno
Expand Down Expand Up @@ -1424,6 +1425,24 @@ def close(self):
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')


class DummyDirEntry:
"""
Minimal os.DirEntry-like object. Returned from DummyPath.scandir().
"""
__slots__ = ('name', '_is_symlink', '_is_dir')

def __init__(self, name, is_symlink, is_dir):
self.name = name
self._is_symlink = is_symlink
self._is_dir = is_dir

def is_symlink(self):
return self._is_symlink

def is_dir(self, *, follow_symlinks=True):
return self._is_dir and (follow_symlinks or not self._is_symlink)


class DummyPath(PathBase):
"""
Simple implementation of PathBase that keeps files and directories in
Expand Down Expand Up @@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None,
stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline)
return stream

def iterdir(self):
path = str(self.resolve())
if path in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
elif path in self._directories:
return iter([self / name for name in self._directories[path]])
@contextlib.contextmanager
def scandir(self):
path = self.resolve()
path_str = str(path)
if path_str in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
elif path_str in self._directories:
yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]])
else:
raise FileNotFoundError(errno.ENOENT, "File not found", path)
raise FileNotFoundError(errno.ENOENT, "File not found", path_str)

@property
def _dir_entry(self):
path_str = str(self)
is_symlink = path_str in self._symlinks
is_directory = (path_str in self._directories
if not is_symlink
else self._symlinks[path_str][1])
return DummyDirEntry(self.name, is_symlink, is_directory)

def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name)
Expand Down Expand Up @@ -1602,7 +1632,7 @@ def setUp(self):
if self.can_symlink:
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB')
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
Expand Down Expand Up @@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))

def test_scandir(self):
p = self.cls(self.base)
with p.scandir() as entries:
self.assertTrue(list(entries))
with p.scandir() as entries:
for entry in entries:
child = p / entry.name
self.assertIsNotNone(entry)
self.assertEqual(entry.name, child.name)
self.assertEqual(entry.is_symlink(),
child.is_symlink())
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
if entry.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())


def test_glob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
Expand Down Expand Up @@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath):
def readlink(self):
path = str(self.parent.resolve() / self.name)
if path in self._symlinks:
return self.with_segments(self._symlinks[path])
return self.with_segments(self._symlinks[path][0])
elif path in self._files or path in self._directories:
raise OSError(errno.EINVAL, "Not a symlink", path)
else:
Expand All @@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False):
if path in self._symlinks:
raise FileExistsError(errno.EEXIST, "File exists", path)
self._directories[parent].add(self.name)
self._symlinks[path] = str(target)
self._symlinks[path] = str(target), target_is_directory


class DummyPathWithSymlinksTest(DummyPathTest):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory
children and their file attributes. This is a trivial wrapper of
:func:`os.scandir`.

0 comments on commit 260843d

Please sign in to comment.