From 260843df1bd8a28596b9a377d8266e2547f7eedc Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 1 Nov 2024 01:19:01 +0000 Subject: [PATCH] GH-125413: Add `pathlib.Path.scandir()` method (#126060) Add `pathlib.Path.scandir()` as a trivial wrapper of `os.scandir()`. This will be used to implement several `PathBase` methods more efficiently, including methods that provide `Path.copy()`. --- Doc/library/pathlib.rst | 29 ++++++++ Doc/whatsnew/3.14.rst | 6 ++ Lib/pathlib/_abc.py | 12 +++- Lib/pathlib/_local.py | 8 +++ Lib/test/test_pathlib/test_pathlib_abc.py | 67 ++++++++++++++++--- ...-10-28-01-24-52.gh-issue-125413.Jat5kq.rst | 3 + 6 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 4380122eb1be7d..b6fb36554f7cec 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1289,6 +1289,35 @@ Reading directories raised. +.. method:: Path.scandir() + + When the path points to a directory, return an iterator of + :class:`os.DirEntry` objects corresponding to entries in the directory. The + returned iterator supports the :term:`context manager` protocol. It is + implemented using :func:`os.scandir` and gives the same guarantees. + + Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can + significantly increase the performance of code that also needs file type or + file attribute information, because :class:`os.DirEntry` objects expose + this information if the operating system provides it when scanning a + directory. + + The following example displays the names of subdirectories. The + ``entry.is_dir()`` check will generally not make an additional system call:: + + >>> p = Path('docs') + >>> with p.scandir() as entries: + ... for entry in entries: + ... if entry.is_dir(): + ... entry.name + ... + '_templates' + '_build' + '_static' + + .. versionadded:: 3.14 + + .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) Glob the given relative *pattern* in the directory represented by this path, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7f9e3107a6e1a0..48314f9c98c036 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -380,6 +380,12 @@ pathlib (Contributed by Barney Gale in :gh:`73991`.) +* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator + of :class:`os.DirEntry` objects. This is exactly equivalent to calling + :func:`os.scandir` on a path object. + + (Contributed by Barney Gale in :gh:`125413`.) + pdb --- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 11c8018b28f26b..dfff8b460d1bf1 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -639,13 +639,23 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) + def scandir(self): + """Yield os.DirEntry objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + raise UnsupportedOperation(self._unsupported_msg('scandir()')) + def iterdir(self): """Yield path objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('iterdir()')) + with self.scandir() as entries: + names = [entry.name for entry in entries] + return map(self.joinpath, names) def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index a78997179820b1..ef072b83d96904 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -615,6 +615,14 @@ def _filter_trailing_slash(self, paths): path_str = path_str[:-1] yield path_str + def scandir(self): + """Yield os.DirEntry objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + return os.scandir(self) + def iterdir(self): """Yield path objects of the directory contents. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 08355a71453807..11e34f5d378a58 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1,4 +1,5 @@ import collections +import contextlib import io import os import errno @@ -1424,6 +1425,24 @@ def close(self): 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') +class DummyDirEntry: + """ + Minimal os.DirEntry-like object. Returned from DummyPath.scandir(). + """ + __slots__ = ('name', '_is_symlink', '_is_dir') + + def __init__(self, name, is_symlink, is_dir): + self.name = name + self._is_symlink = is_symlink + self._is_dir = is_dir + + def is_symlink(self): + return self._is_symlink + + def is_dir(self, *, follow_symlinks=True): + return self._is_dir and (follow_symlinks or not self._is_symlink) + + class DummyPath(PathBase): """ Simple implementation of PathBase that keeps files and directories in @@ -1491,14 +1510,25 @@ def open(self, mode='r', buffering=-1, encoding=None, stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) return stream - def iterdir(self): - path = str(self.resolve()) - if path in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) - elif path in self._directories: - return iter([self / name for name in self._directories[path]]) + @contextlib.contextmanager + def scandir(self): + path = self.resolve() + path_str = str(path) + if path_str in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str) + elif path_str in self._directories: + yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]]) else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) + raise FileNotFoundError(errno.ENOENT, "File not found", path_str) + + @property + def _dir_entry(self): + path_str = str(self) + is_symlink = path_str in self._symlinks + is_directory = (path_str in self._directories + if not is_symlink + else self._symlinks[path_str][1]) + return DummyDirEntry(self.name, is_symlink, is_directory) def mkdir(self, mode=0o777, parents=False, exist_ok=False): path = str(self.parent.resolve() / self.name) @@ -1602,7 +1632,7 @@ def setUp(self): if self.can_symlink: p.joinpath('linkA').symlink_to('fileA') p.joinpath('brokenLink').symlink_to('non-existing') - p.joinpath('linkB').symlink_to('dirB') + p.joinpath('linkB').symlink_to('dirB', target_is_directory=True) p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB')) p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB')) p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop') @@ -2187,6 +2217,23 @@ def test_iterdir_nodir(self): self.assertIn(cm.exception.errno, (errno.ENOTDIR, errno.ENOENT, errno.EINVAL)) + def test_scandir(self): + p = self.cls(self.base) + with p.scandir() as entries: + self.assertTrue(list(entries)) + with p.scandir() as entries: + for entry in entries: + child = p / entry.name + self.assertIsNotNone(entry) + self.assertEqual(entry.name, child.name) + self.assertEqual(entry.is_symlink(), + child.is_symlink()) + self.assertEqual(entry.is_dir(follow_symlinks=False), + child.is_dir(follow_symlinks=False)) + if entry.name != 'brokenLinkLoop': + self.assertEqual(entry.is_dir(), child.is_dir()) + + def test_glob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -3038,7 +3085,7 @@ class DummyPathWithSymlinks(DummyPath): def readlink(self): path = str(self.parent.resolve() / self.name) if path in self._symlinks: - return self.with_segments(self._symlinks[path]) + return self.with_segments(self._symlinks[path][0]) elif path in self._files or path in self._directories: raise OSError(errno.EINVAL, "Not a symlink", path) else: @@ -3050,7 +3097,7 @@ def symlink_to(self, target, target_is_directory=False): if path in self._symlinks: raise FileExistsError(errno.EEXIST, "File exists", path) self._directories[parent].add(self.name) - self._symlinks[path] = str(target) + self._symlinks[path] = str(target), target_is_directory class DummyPathWithSymlinksTest(DummyPathTest): diff --git a/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst new file mode 100644 index 00000000000000..ddf1f9725d9695 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-28-01-24-52.gh-issue-125413.Jat5kq.rst @@ -0,0 +1,3 @@ +Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory +children and their file attributes. This is a trivial wrapper of +:func:`os.scandir`.