Skip to content

Commit

Permalink
pathlib ABCs: defer path joining
Browse files Browse the repository at this point in the history
Defer joining of path segments in the private `PurePathBase` ABC. The new
behaviour matches how the public `PurePath` class handles path segments.

This slightly reduces the size of `PurePath` objects by eliminating a
`_raw_path` slot.
  • Loading branch information
barneygale committed Nov 4, 2024
1 parent 9b7294c commit ea98944
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 48 deletions.
66 changes: 44 additions & 22 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class PathGlobber(_GlobberBase):
@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)
return path.with_segments(str(path) + text)


class PurePathBase:
Expand All @@ -112,9 +112,13 @@ class PurePathBase:
"""

__slots__ = (
# The `_raw_path` slot store a joined string path. This is set in the
# `__init__()` method.
'_raw_path',
# The `_raw_paths` slot stores unjoined string paths. This is set in
# the `__init__()` method.
'_raw_paths',

# The `_str` slot stores the string representation of the path,
# computed when `__str__()` is called for the first time.
'_str',

# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate
Expand All @@ -124,11 +128,14 @@ class PurePathBase:
parser = ParserBase()
_globber = PathGlobber

def __init__(self, path, *paths):
self._raw_path = self.parser.join(path, *paths) if paths else path
if not isinstance(self._raw_path, str):
raise TypeError(
f"path should be a str, not {type(self._raw_path).__name__!r}")
def __init__(self, arg, *args):
paths = [arg]
paths.extend(args)
for path in paths:
if not isinstance(path, str):
raise TypeError(
f"path should be a str, not {type(path).__name__!r}")
self._raw_paths = paths
self._resolving = False

def with_segments(self, *pathsegments):
Expand All @@ -138,10 +145,25 @@ def with_segments(self, *pathsegments):
"""
return type(self)(*pathsegments)

@property
def _raw_path(self):
paths = self._raw_paths
if len(paths) == 0:
path = ''
elif len(paths) == 1:
path = paths[0]
else:
path = self.parser.join(*paths)
return path

def __str__(self):
"""Return the string representation of the path, suitable for
passing to system calls."""
return self._raw_path
try:
return self._str
except AttributeError:
self._str = self._raw_path
return self._str

def as_posix(self):
"""Return the string representation of the path with forward (/)
Expand All @@ -166,7 +188,7 @@ def anchor(self):
@property
def name(self):
"""The final path component, if any."""
return self.parser.split(self._raw_path)[1]
return self.parser.split(str(self))[1]

@property
def suffix(self):
Expand Down Expand Up @@ -202,7 +224,7 @@ def with_name(self, name):
split = self.parser.split
if split(name)[0]:
raise ValueError(f"Invalid name {name!r}")
return self.with_segments(split(self._raw_path)[0], name)
return self.with_segments(split(str(self))[0], name)

def with_stem(self, stem):
"""Return a new path with the stem changed."""
Expand Down Expand Up @@ -242,17 +264,17 @@ def relative_to(self, other, *, walk_up=False):
anchor0, parts0 = self._stack
anchor1, parts1 = other._stack
if anchor0 != anchor1:
raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors")
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
while parts0 and parts1 and parts0[-1] == parts1[-1]:
parts0.pop()
parts1.pop()
for part in parts1:
if not part or part == '.':
pass
elif not walk_up:
raise ValueError(f"{self._raw_path!r} is not in the subpath of {other._raw_path!r}")
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
elif part == '..':
raise ValueError(f"'..' segment in {other._raw_path!r} cannot be walked")
raise ValueError(f"'..' segment in {str(other)!r} cannot be walked")
else:
parts0.append('..')
return self.with_segments('', *reversed(parts0))
Expand Down Expand Up @@ -289,17 +311,17 @@ def joinpath(self, *pathsegments):
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self._raw_path, *pathsegments)
return self.with_segments(*self._raw_paths, *pathsegments)

def __truediv__(self, key):
try:
return self.with_segments(self._raw_path, key)
return self.with_segments(*self._raw_paths, key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, self._raw_path)
return self.with_segments(key, *self._raw_paths)
except TypeError:
return NotImplemented

Expand All @@ -311,7 +333,7 @@ def _stack(self):
*parts* is a reversed list of parts following the anchor.
"""
split = self.parser.split
path = self._raw_path
path = str(self)
parent, name = split(path)
names = []
while path != parent:
Expand All @@ -323,7 +345,7 @@ def _stack(self):
@property
def parent(self):
"""The logical parent of the path."""
path = self._raw_path
path = str(self)
parent = self.parser.split(path)[0]
if path != parent:
parent = self.with_segments(parent)
Expand All @@ -335,7 +357,7 @@ def parent(self):
def parents(self):
"""A sequence of this path's logical parents."""
split = self.parser.split
path = self._raw_path
path = str(self)
parent = split(path)[0]
parents = []
while path != parent:
Expand All @@ -347,7 +369,7 @@ def parents(self):
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
return self.parser.isabs(self._raw_path)
return self.parser.isabs(str(self))

@property
def _pattern_str(self):
Expand Down
21 changes: 0 additions & 21 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,6 @@ class PurePath(PurePathBase):
"""

__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
Expand All @@ -81,11 +77,6 @@ class PurePath(PurePathBase):
# tail are normalized.
'_drv', '_root', '_tail_cached',

# The `_str` slot stores the string representation of the path,
# computed from the drive, root and tail when `__str__()` is called
# for the first time. It's used to implement `_str_normcase`
'_str',

# The `_str_normcase_cached` slot stores the string path with
# normalized case. It is set when the `_str_normcase` property is
# accessed for the first time. It's used to implement `__eq__()`
Expand Down Expand Up @@ -299,18 +290,6 @@ def _parse_pattern(cls, pattern):
parts.append('')
return parts

@property
def _raw_path(self):
"""The joined but unnormalized path."""
paths = self._raw_paths
if len(paths) == 0:
path = ''
elif len(paths) == 1:
path = paths[0]
else:
path = self.parser.join(*paths)
return path

@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
Expand Down
5 changes: 0 additions & 5 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,6 @@ def test_unsupported_operation_pure(self):
p.suffix
with self.assertRaises(e):
p.suffixes
with self.assertRaises(e):
p / 'bar'
with self.assertRaises(e):
'bar' / p
self.assertRaises(e, p.joinpath, 'bar')
self.assertRaises(e, p.with_name, 'bar')
self.assertRaises(e, p.with_stem, 'bar')
self.assertRaises(e, p.with_suffix, '.txt')
Expand Down

0 comments on commit ea98944

Please sign in to comment.