From bf8da8655801d4f869b38e667c68cab28f9c9562 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 Mar 2025 00:23:52 +0000 Subject: [PATCH 1/3] GH-130614: pathlib ABCs: improve support for receiving path metadata In the private pathlib ABCs, replace `_WritablePath._write_info()` with `_WritablePath._copy_from()`. This provides the target path object with more control over the copying process, including support for querying and setting metadata *before* the path is created. Adjust `_ReadablePath.copy()` so that it forwards its keyword arguments to `_WritablePath._copy_from()` of the target path object. This allows us to remove the unimplemented *preserve_metadata* argument in the ABC method, making it a `Path` exclusive. --- Lib/pathlib/__init__.py | 74 +++++++++++++++++++++++++++++++++-------- Lib/pathlib/_os.py | 42 ++--------------------- Lib/pathlib/types.py | 33 ++++++++++++------ 3 files changed, 87 insertions(+), 62 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 338016dbbbe972..c272ac4dddd40d 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -29,7 +29,7 @@ from pathlib._os import ( PathInfo, DirEntryInfo, ensure_different_files, ensure_distinct_paths, - copy_file, copy_info, + copyfile2, copyfileobj, magic_open, copy_info, ) @@ -810,12 +810,6 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - def _write_info(self, info, follow_symlinks=True): - """ - Write the given PathInfo to this path. - """ - copy_info(info, self, follow_symlinks=follow_symlinks) - _remove_leading_dot = operator.itemgetter(slice(2, None)) _remove_trailing_slash = operator.itemgetter(slice(-1)) @@ -1100,18 +1094,21 @@ def replace(self, target): target = self.with_segments(target) return target - def copy(self, target, follow_symlinks=True, preserve_metadata=False): + def copy(self, target, **kwargs): """ Recursively copy this file or directory tree to the given destination. """ if not hasattr(target, 'with_segments'): target = self.with_segments(target) ensure_distinct_paths(self, target) - copy_file(self, target, follow_symlinks, preserve_metadata) + try: + copy_to_target = target._copy_from + except AttributeError: + raise TypeError(f"Target path is not writable: {target!r}") from None + copy_to_target(self, **kwargs) return target.joinpath() # Empty join to ensure fresh metadata. - def copy_into(self, target_dir, *, follow_symlinks=True, - preserve_metadata=False): + def copy_into(self, target_dir, **kwargs): """ Copy this file or directory tree into the given existing directory. """ @@ -1122,8 +1119,59 @@ def copy_into(self, target_dir, *, follow_symlinks=True, target = target_dir / name else: target = self.with_segments(target_dir, name) - return self.copy(target, follow_symlinks=follow_symlinks, - preserve_metadata=preserve_metadata) + return self.copy(target, **kwargs) + + def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): + """ + Recursively copy the given path to this path. + """ + if not follow_symlinks and source.info.is_symlink(): + self._copy_from_symlink(source, preserve_metadata) + elif source.info.is_dir(): + children = source.iterdir() + os.mkdir(self) + for child in children: + self.joinpath(child.name)._copy_from( + child, follow_symlinks, preserve_metadata) + if preserve_metadata: + copy_info(source.info, self) + else: + self._copy_from_file(source, preserve_metadata) + + def _copy_from_file(self, source, preserve_metadata=False): + ensure_different_files(source, self) + with magic_open(source, 'rb') as source_f: + with open(self, 'wb') as target_f: + copyfileobj(source_f, target_f) + if preserve_metadata: + copy_info(source.info, self) + + if copyfile2: + # Use fast OS routine for local file copying where available. + _copy_from_file_fallback = _copy_from_file + def _copy_from_file(self, source, preserve_metadata=False): + try: + source = os.fspath(source) + except TypeError: + pass + else: + copyfile2(source, str(self)) + return + self._copy_from_file_fallback(source, preserve_metadata) + + if os.name == 'nt': + # If a directory-symlink is copied *before* its target, then + # os.symlink() incorrectly creates a file-symlink on Windows. Avoid + # this by passing *target_is_dir* to os.symlink() on Windows. + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self, source.info.is_dir()) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) + else: + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) def move(self, target): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 121b6d656a8f53..ee8657f427efbd 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -102,16 +102,16 @@ def _sendfile(source_fd, target_fd): if _winapi and hasattr(_winapi, 'CopyFile2'): - def _copyfile2(source, target): + def copyfile2(source, target): """ Copy from one file to another using CopyFile2 (Windows only). """ _winapi.CopyFile2(source, target, 0) else: - _copyfile2 = None + copyfile2 = None -def _copyfileobj(source_f, target_f): +def copyfileobj(source_f, target_f): """ Copy data from file-like object source_f to file-like object target_f. """ @@ -242,42 +242,6 @@ def ensure_different_files(source, target): raise err -def copy_file(source, target, follow_symlinks=True, preserve_metadata=False): - """ - Recursively copy the given source ReadablePath to the given target WritablePath. - """ - info = source.info - if not follow_symlinks and info.is_symlink(): - target.symlink_to(str(source.readlink()), info.is_dir()) - if preserve_metadata: - target._write_info(info, follow_symlinks=False) - elif info.is_dir(): - children = source.iterdir() - target.mkdir() - for src in children: - dst = target.joinpath(src.name) - copy_file(src, dst, follow_symlinks, preserve_metadata) - if preserve_metadata: - target._write_info(info) - else: - if _copyfile2: - # Use fast OS routine for local file copying where available. - try: - source_p = os.fspath(source) - target_p = os.fspath(target) - except TypeError: - pass - else: - _copyfile2(source_p, target_p) - return - ensure_different_files(source, target) - with magic_open(source, 'rb') as source_f: - with magic_open(target, 'wb') as target_f: - _copyfileobj(source_f, target_f) - if preserve_metadata: - target._write_info(info) - - def copy_info(info, target, follow_symlinks=True): """Copy metadata from the given PathInfo to the given local path.""" copy_times_ns = ( diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py index 9852bd4ff1e997..021ea51a2ee5dd 100644 --- a/Lib/pathlib/types.py +++ b/Lib/pathlib/types.py @@ -13,7 +13,7 @@ from abc import ABC, abstractmethod from glob import _PathGlobber from pathlib import PurePath, Path -from pathlib._os import magic_open, ensure_distinct_paths, copy_file +from pathlib._os import magic_open, ensure_distinct_paths, ensure_different_files, copyfileobj from typing import Optional, Protocol, runtime_checkable @@ -332,18 +332,21 @@ def readlink(self): """ raise NotImplementedError - def copy(self, target, follow_symlinks=True, preserve_metadata=False): + def copy(self, target, **kwargs): """ Recursively copy this file or directory tree to the given destination. """ if not hasattr(target, 'with_segments'): target = self.with_segments(target) ensure_distinct_paths(self, target) - copy_file(self, target, follow_symlinks, preserve_metadata) + try: + copy_to_target = target._copy_from + except AttributeError: + raise TypeError(f"Target path is not writable: {target!r}") from None + copy_to_target(self, **kwargs) return target.joinpath() # Empty join to ensure fresh metadata. - def copy_into(self, target_dir, *, follow_symlinks=True, - preserve_metadata=False): + def copy_into(self, target_dir, **kwargs): """ Copy this file or directory tree into the given existing directory. """ @@ -354,8 +357,7 @@ def copy_into(self, target_dir, *, follow_symlinks=True, target = target_dir / name else: target = self.with_segments(target_dir, name) - return self.copy(target, follow_symlinks=follow_symlinks, - preserve_metadata=preserve_metadata) + return self.copy(target, **kwargs) class _WritablePath(_JoinablePath): @@ -409,11 +411,22 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with magic_open(self, mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - def _write_info(self, info, follow_symlinks=True): + def _copy_from(self, source, follow_symlinks=True): """ - Write the given PathInfo to this path. + Recursively copy the given path to this path. """ - pass + if not follow_symlinks and source.info.is_symlink(): + self.symlink_to(str(source.readlink()), source.info.is_dir()) + elif source.info.is_dir(): + children = source.iterdir() + self.mkdir() + for child in children: + self.joinpath(child.name)._copy_from(child, follow_symlinks) + else: + ensure_different_files(source, self) + with magic_open(source, 'rb') as source_f: + with magic_open(self, 'wb') as target_f: + copyfileobj(source_f, target_f) _JoinablePath.register(PurePath) From 119822c9d408dc5d47fac784c146197612e6994d Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 14 Mar 2025 19:59:47 +0000 Subject: [PATCH 2/3] Use a stack to implement `_WritablePath._copy_from()` --- Lib/pathlib/types.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py index 021ea51a2ee5dd..9b1755ddaa1dc0 100644 --- a/Lib/pathlib/types.py +++ b/Lib/pathlib/types.py @@ -415,18 +415,21 @@ def _copy_from(self, source, follow_symlinks=True): """ Recursively copy the given path to this path. """ - if not follow_symlinks and source.info.is_symlink(): - self.symlink_to(str(source.readlink()), source.info.is_dir()) - elif source.info.is_dir(): - children = source.iterdir() - self.mkdir() - for child in children: - self.joinpath(child.name)._copy_from(child, follow_symlinks) - else: - ensure_different_files(source, self) - with magic_open(source, 'rb') as source_f: - with magic_open(self, 'wb') as target_f: - copyfileobj(source_f, target_f) + stack = [(source, self)] + while stack: + src, dst = stack.pop() + if not follow_symlinks and src.info.is_symlink(): + dst.symlink_to(str(src.readlink()), src.info.is_dir()) + elif src.info.is_dir(): + children = src.iterdir() + dst.mkdir() + for child in children: + stack.append((child, dst.joinpath(child.name))) + else: + ensure_different_files(src, dst) + with magic_open(src, 'rb') as source_f: + with magic_open(dst, 'wb') as target_f: + copyfileobj(source_f, target_f) _JoinablePath.register(PurePath) From b5ed8a1fe772ba37c3b3eefbb6b1395ca3590ad9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 15 Mar 2025 11:01:15 +0000 Subject: [PATCH 3/3] Swap import order to make backporting a tiny bit easier. --- Lib/pathlib/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/types.py b/Lib/pathlib/types.py index 9b1755ddaa1dc0..85dd9e5b2d6b9a 100644 --- a/Lib/pathlib/types.py +++ b/Lib/pathlib/types.py @@ -12,8 +12,8 @@ from abc import ABC, abstractmethod from glob import _PathGlobber -from pathlib import PurePath, Path from pathlib._os import magic_open, ensure_distinct_paths, ensure_different_files, copyfileobj +from pathlib import PurePath, Path from typing import Optional, Protocol, runtime_checkable