From bedf76e95d4609af3bf6df92ee7f54b30f36eebb Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 Jul 2024 15:33:19 +0100 Subject: [PATCH 1/2] GH-119169: Speed up `os.fwalk(topdown=False)` Add entries to the stack while iterating over `os.scandir()` results, rather than afterwards. This removes the need for an `entries` list and some zipping. --- Lib/os.py | 23 ++++++++----------- ...-07-06-15-31-01.gh-issue-119169.bfpdsr.rst | 1 + 2 files changed, 11 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-06-15-31-01.gh-issue-119169.bfpdsr.rst diff --git a/Lib/os.py b/Lib/os.py index 4b48afb040e565..e7a21079b4f0f2 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -542,7 +542,10 @@ def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): scandir_it = scandir(topfd) dirs = [] nondirs = [] - entries = None if topdown or follow_symlinks else [] + topprefix = path.join(toppath, toppath[:0]) # Add trailing slash. + if not topdown: + # Yield after sub-directory traversal if going bottom up. + stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd))) for entry in scandir_it: name = entry.name if isbytes: @@ -550,8 +553,11 @@ def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): try: if entry.is_dir(): dirs.append(name) - if entries is not None: - entries.append(entry) + if not topdown: + stack.append( + (_fwalk_walk, ( + False, topfd, topprefix + name, name, + None if follow_symlinks else entry))) else: nondirs.append(name) except OSError: @@ -564,18 +570,9 @@ def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): if topdown: yield toppath, dirs, nondirs, topfd - else: - stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd))) - - toppath = path.join(toppath, toppath[:0]) # Add trailing slash. - if entries is None: stack.extend( - (_fwalk_walk, (False, topfd, toppath + name, name, None)) + (_fwalk_walk, (False, topfd, topprefix + name, name, None)) for name in dirs[::-1]) - else: - stack.extend( - (_fwalk_walk, (False, topfd, toppath + name, name, entry)) - for name, entry in zip(dirs[::-1], entries[::-1])) __all__.append("fwalk") diff --git a/Misc/NEWS.d/next/Library/2024-07-06-15-31-01.gh-issue-119169.bfpdsr.rst b/Misc/NEWS.d/next/Library/2024-07-06-15-31-01.gh-issue-119169.bfpdsr.rst new file mode 100644 index 00000000000000..38d35871a1407a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-06-15-31-01.gh-issue-119169.bfpdsr.rst @@ -0,0 +1 @@ +Speed up :func:`os.fwalk` in bottom-up mode. From 53255075a120ecc9e1a440e6873ac680eecc6217 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 6 Jul 2024 15:35:09 +0100 Subject: [PATCH 2/2] Extraneous comment. --- Lib/os.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/os.py b/Lib/os.py index e7a21079b4f0f2..4ee87765ab3976 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -544,7 +544,6 @@ def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks): nondirs = [] topprefix = path.join(toppath, toppath[:0]) # Add trailing slash. if not topdown: - # Yield after sub-directory traversal if going bottom up. stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd))) for entry in scandir_it: name = entry.name