From 1bc7df72be1c26f571170a18a15b1038a4359a13 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 28 Dec 2023 01:18:41 +0000 Subject: [PATCH 1/5] GH-113528: Deoptimise `pathlib._abc.PurePathBase.parent[s]` Replace use of `_from_parsed_parts()` with `with_segments()`, and move assignments to `_drv`, `_root`, _tail_cached` and `_str` slots into `PurePath`. --- Lib/pathlib/__init__.py | 49 +++++++++++++++++++++++++++++++++++++ Lib/pathlib/_abc.py | 54 ++++++++++------------------------------- 2 files changed, 62 insertions(+), 41 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index ab87b49d0277f3..329cb8a45f29cd 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -9,6 +9,7 @@ import ntpath import os import posixpath +from _collections_abc import Sequence try: import pwd @@ -29,6 +30,35 @@ ] +class _PathParents(Sequence): + """This object provides sequence-like access to the logical ancestors + of a path. Don't try to construct it yourself.""" + __slots__ = ('_path', '_drv', '_root', '_tail') + + def __init__(self, path): + self._path = path + self._drv = path.drive + self._root = path.root + self._tail = path._tail + + def __len__(self): + return len(self._tail) + + def __getitem__(self, idx): + if isinstance(idx, slice): + return tuple(self[i] for i in range(*idx.indices(len(self)))) + + if idx >= len(self) or idx < -len(self): + raise IndexError(idx) + if idx < 0: + idx += len(self) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) + + def __repr__(self): + return "<{}.parents>".format(type(self._path).__name__) + + UnsupportedOperation = _abc.UnsupportedOperation @@ -164,6 +194,25 @@ def __ge__(self, other): return NotImplemented return self._parts_normcase >= other._parts_normcase + @property + def parent(self): + """The logical parent of the path.""" + drv = self.drive + root = self.root + tail = self._tail + if not tail: + return self + path = self._from_parsed_parts(drv, root, tail[:-1]) + path._resolving = self._resolving + return path + + @property + def parents(self): + """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. + return _PathParents(self) + def as_uri(self): """Return the path as a URI.""" if not self.is_absolute(): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index efe56ec565c162..4e237246045e69 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -3,7 +3,6 @@ import posixpath import sys import warnings -from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from itertools import chain from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -140,35 +139,6 @@ class UnsupportedOperation(NotImplementedError): pass -class _PathParents(Sequence): - """This object provides sequence-like access to the logical ancestors - of a path. Don't try to construct it yourself.""" - __slots__ = ('_path', '_drv', '_root', '_tail') - - def __init__(self, path): - self._path = path - self._drv = path.drive - self._root = path.root - self._tail = path._tail - - def __len__(self): - return len(self._tail) - - def __getitem__(self, idx): - if isinstance(idx, slice): - return tuple(self[i] for i in range(*idx.indices(len(self)))) - - if idx >= len(self) or idx < -len(self): - raise IndexError(idx) - if idx < 0: - idx += len(self) - return self._path._from_parsed_parts(self._drv, self._root, - self._tail[:-idx - 1]) - - def __repr__(self): - return "<{}.parents>".format(type(self._path).__name__) - - class PurePathBase: """Base class for pure path objects. @@ -457,21 +427,23 @@ def __rtruediv__(self, key): @property def parent(self): """The logical parent of the path.""" - drv = self.drive - root = self.root - tail = self._tail - if not tail: - return self - path = self._from_parsed_parts(drv, root, tail[:-1]) - path._resolving = self._resolving - return path + path, name = self.pathmod.split(str(self)) + if name and name != '.': + path = self.with_segments(path) + path._resolving = self._resolving + return path + return self @property def parents(self): """A sequence of this path's logical parents.""" - # The value of this property should not be cached on the path object, - # as doing so would introduce a reference cycle. - return _PathParents(self) + split = self.pathmod.split + paths = [] + path, name = split(str(self)) + while name and name != '.': + paths.append(self.with_segments(path)) + path, name = split(path) + return tuple(paths) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, From f93327054964e985c3bcf2e017645851589a4ec2 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 28 Dec 2023 02:21:20 +0000 Subject: [PATCH 2/5] Modify `PathBase._split_stack()` along similar lines. --- Lib/pathlib/_abc.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 4e237246045e69..027c535a71d2c5 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -951,9 +951,15 @@ def _split_stack(self): uppermost parent of the path (equivalent to path.parents[-1]), and *parts* is a reversed list of parts following the anchor. """ - if not self._tail: + split = self.pathmod.split + names = [] + path, name = split(str(self)) + while name and name != '.': + names.append(name) + path, name = split(path) + if not names: return self, [] - return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1] + return self.with_segments(path), names def resolve(self, strict=False): """ From 2f6354a9e66ca3d58520bde004df49ed2b34d155 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 28 Dec 2023 03:19:49 +0000 Subject: [PATCH 3/5] Make `_resolving` and ABC-only thing. --- Lib/pathlib/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 329cb8a45f29cd..6e86249f6c7145 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -123,7 +123,6 @@ def __init__(self, *args): paths.append(path) # Avoid calling super().__init__, as an optimisation self._raw_paths = paths - self._resolving = False def __reduce__(self): # Using the parts tuple helps share interned path parts @@ -202,9 +201,7 @@ def parent(self): tail = self._tail if not tail: return self - path = self._from_parsed_parts(drv, root, tail[:-1]) - path._resolving = self._resolving - return path + return self._from_parsed_parts(drv, root, tail[:-1]) @property def parents(self): From e4c490ac2a73d9c31c159e7037760943da65b65f Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 Jan 2024 22:57:11 +0000 Subject: [PATCH 4/5] Rely less on pathlib normalization details. --- Lib/pathlib/_abc.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 027c535a71d2c5..f67780e540b1a2 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -427,23 +427,26 @@ def __rtruediv__(self, key): @property def parent(self): """The logical parent of the path.""" - path, name = self.pathmod.split(str(self)) - if name and name != '.': - path = self.with_segments(path) - path._resolving = self._resolving - return path + path = str(self) + parent = self.pathmod.dirname(path) + if path != parent: + parent = self.with_segments(parent) + parent._resolving = self._resolving + return parent return self @property def parents(self): """A sequence of this path's logical parents.""" - split = self.pathmod.split - paths = [] - path, name = split(str(self)) - while name and name != '.': - paths.append(self.with_segments(path)) - path, name = split(path) - return tuple(paths) + dirname = self.pathmod.dirname + path = str(self) + parent = dirname(path) + parents = [] + while path != parent: + parents.append(self.with_segments(parent)) + path = parent + parent = dirname(path) + return tuple(parents) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, @@ -952,11 +955,13 @@ def _split_stack(self): *parts* is a reversed list of parts following the anchor. """ split = self.pathmod.split + path = str(self) + parent, name = split(path) names = [] - path, name = split(str(self)) - while name and name != '.': + while path != parent: names.append(name) - path, name = split(path) + path = parent + parent, name = split(path) if not names: return self, [] return self.with_segments(path), names @@ -980,7 +985,9 @@ def resolve(self, strict=False): link_count = 0 while parts: part = parts.pop() - if part == '..': + if not part or part == '.': + continue + elif part == '..': if not path._tail: if path.root: # Delete '..' segment immediately following root From 1545411b0e0231d6f50aab34857ede33392f4167 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 Jan 2024 18:41:59 +0000 Subject: [PATCH 5/5] Undo `resolve()` changes. --- Lib/pathlib/_abc.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 9805c51046aa7e..5dcea7ead8ab3c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -931,17 +931,9 @@ def _split_stack(self): uppermost parent of the path (equivalent to path.parents[-1]), and *parts* is a reversed list of parts following the anchor. """ - split = self.pathmod.split - path = str(self) - parent, name = split(path) - names = [] - while path != parent: - names.append(name) - path = parent - parent, name = split(path) - if not names: + if not self._tail: return self, [] - return self.with_segments(path), names + return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1] def resolve(self, strict=False): """ @@ -962,9 +954,7 @@ def resolve(self, strict=False): link_count = 0 while parts: part = parts.pop() - if not part or part == '.': - continue - elif part == '..': + if part == '..': if not path._tail: if path.root: # Delete '..' segment immediately following root