From c5277b7549f6451cb0de79ac7d60770ad3deb67c Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Fri, 3 Feb 2023 20:26:37 -0800 Subject: [PATCH 01/11] gh-84559: Change the default multiprocessing start method. We drop 'fork' in favor of 'forkserver' or 'spawn'. See the issue for details. --- Lib/multiprocessing/context.py | 19 +++++++++++-------- Lib/test/_test_multiprocessing.py | 21 ++++++++++++++++----- Lib/test/support/__init__.py | 8 +++++++- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/Lib/multiprocessing/context.py b/Lib/multiprocessing/context.py index de8a264829dff3..f501fda602e098 100644 --- a/Lib/multiprocessing/context.py +++ b/Lib/multiprocessing/context.py @@ -259,13 +259,12 @@ def get_start_method(self, allow_none=False): def get_all_start_methods(self): """Returns a list of the supported start methods, default first.""" - if sys.platform == 'win32': - return ['spawn'] - else: - methods = ['spawn', 'fork'] if sys.platform == 'darwin' else ['fork', 'spawn'] - if reduction.HAVE_SEND_HANDLE: - methods.append('forkserver') - return methods + default = self._default_context.get_start_method() + start_method_names = [default] + start_method_names += ( + name for name in _concrete_contexts if name != default + ) + return start_method_names # @@ -325,7 +324,11 @@ def _check_available(self): # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. _default_context = DefaultContext(_concrete_contexts['spawn']) else: - _default_context = DefaultContext(_concrete_contexts['fork']) + # gh-84559: We changed the default to a thread safe one in 3.14. + if reduction.HAVE_SEND_HANDLE: + _default_context = DefaultContext(_concrete_contexts['forkserver']) + else: + _default_context = DefaultContext(_concrete_contexts['spawn']) else: diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 9a2db24b4bd597..24ff8f79becce5 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5261,15 +5261,26 @@ def test_set_get(self): multiprocessing.set_start_method(old_method, force=True) self.assertGreaterEqual(count, 1) - def test_get_all(self): + def test_get_all_start_methods(self): methods = multiprocessing.get_all_start_methods() + self.assertIn('spawn', methods) if sys.platform == 'win32': self.assertEqual(methods, ['spawn']) + if sys.platform == 'darwin': + self.assertEqual(methods[0], 'spawn') # The default is first. + # Whether these work or not, they remain available on macOS. + self.assertIn('fork', methods) + self.assertIn('forkserver', methods) else: - self.assertTrue(methods == ['fork', 'spawn'] or - methods == ['spawn', 'fork'] or - methods == ['fork', 'spawn', 'forkserver'] or - methods == ['spawn', 'fork', 'forkserver']) + # POSIX + self.assertIn('fork', methods) + if other_methods := set(methods) - {'fork', 'spawn'}: + self.assertEqual({'forkserver'}, other_methods) + # >=3.14 Defaults to forkserver if the platform supports it. + self.assertIn(methods[0], {'forkserver', 'spawn'}, + msg='3.14+ default must not be fork') + if methods[0] == 'spawn': + self.assertNotIn('forkserver', methods) def test_preload_resources(self): if multiprocessing.get_start_method() != 'forkserver': diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 4a22ccdd4db403..370edc778497d8 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2161,7 +2161,13 @@ def skip_if_broken_multiprocessing_synchronize(): # bpo-38377: On Linux, creating a semaphore fails with OSError # if the current user does not have the permission to create # a file in /dev/shm/ directory. - synchronize.Lock(ctx=None) + import multiprocessing + synchronize.Lock(ctx=multiprocessing.get_context('fork')) + # The explicit fork mp context is required as relying on the + # default breaks TestResourceTracker.test_resource_tracker_reused + # when the default start method is not fork as synchronize creates + # a new multiprocessing.resource_tracker process at module import + # time via the aboe call in that scenario. This enables gh-84559. except OSError as exc: raise unittest.SkipTest(f"broken multiprocessing SemLock: {exc!r}") From 7ab7ebc8a32766730ff0f135707f29560a914919 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 19 Sep 2024 00:09:55 +0000 Subject: [PATCH 02/11] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst diff --git a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst new file mode 100644 index 00000000000000..883e4105ef6f3b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst @@ -0,0 +1 @@ +The default :mod:`multiprocessing` start method on Linux and other POSIX systems has been changed away from often unsafe``"fork"`` to ``"forkserver"`` (when the platform support sending file handles over pipes) or ``"spawn"``. Mac and Windows are unchanged as they already default to ``"spawn"``. From f09cd81f5591a1749c564dddb6e6f32cd02f0544 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 24 Sep 2024 21:14:18 +0000 Subject: [PATCH 03/11] Update module docs. --- Doc/library/concurrent.futures.rst | 14 ++++++-------- Doc/library/multiprocessing.rst | 21 +++++++++++++++------ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index e3b24451188cc4..ce72127127c7a6 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -286,14 +286,6 @@ to a :class:`ProcessPoolExecutor` will result in deadlock. Added the *initializer* and *initargs* arguments. - .. note:: - The default :mod:`multiprocessing` start method - (see :ref:`multiprocessing-start-methods`) will change away from - *fork* in Python 3.14. Code that requires *fork* be used for their - :class:`ProcessPoolExecutor` should explicitly specify that by - passing a ``mp_context=multiprocessing.get_context("fork")`` - parameter. - .. versionchanged:: 3.11 The *max_tasks_per_child* argument was added to allow users to control the lifetime of workers in the pool. @@ -310,6 +302,12 @@ to a :class:`ProcessPoolExecutor` will result in deadlock. *max_workers* uses :func:`os.process_cpu_count` by default, instead of :func:`os.cpu_count`. + .. versionchanged:: 3.14 + The default process start method (see + :ref:`multiprocessing-start-methods`) changed away from *fork*. If you + require the *fork* start method for :class:`ProcessPoolExecutor` you must + explicitly pass ``mp_context=multiprocessing.get_context("fork")``. + .. _processpoolexecutor-example: ProcessPoolExecutor Example diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 80d6e4dae24463..f054e5066f7158 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -124,11 +124,11 @@ to start a process. These *start methods* are inherited by the child process. Note that safely forking a multithreaded process is problematic. - Available on POSIX systems. Currently the default on POSIX except macOS. + Available on POSIX systems. - .. note:: - The default start method will change away from *fork* in Python 3.14. - Code that requires *fork* should explicitly specify that via + .. versionchanged:: 3.14 + This is no longer the default start method on any platform. + Code that requires *fork* must explicitly specify that via :func:`get_context` or :func:`set_start_method`. .. versionchanged:: 3.12 @@ -146,9 +146,11 @@ to start a process. These *start methods* are side-effect so it is generally safe for it to use :func:`os.fork`. No unnecessary resources are inherited. - Available on POSIX platforms which support passing file descriptors - over Unix pipes such as Linux. + Available on POSIX platforms which support passing file descriptors over + Unix pipes such as Linux. The default on those. + .. versionchanged:: 3.14 + This became the default start method on POSIX platforms. .. versionchanged:: 3.4 *spawn* added on all POSIX platforms, and *forkserver* added for @@ -162,6 +164,13 @@ to start a process. These *start methods* are method should be considered unsafe as it can lead to crashes of the subprocess as macOS system libraries may start threads. See :issue:`33725`. +.. versionchanged:: 3.14 + + On POSIX platforms the default start method was changed from *fork* to + *forkserver* to retain the performance but avoid common multithreaded + process incompatibilities. See :issue:`84559`. + + On POSIX using the *spawn* or *forkserver* start methods will also start a *resource tracker* process which tracks the unlinked named system resources (such as named semaphores or From a3396624f0844f703f088808e27d49d845eaf486 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 24 Sep 2024 21:23:48 +0000 Subject: [PATCH 04/11] What's New + issue ref fix. --- Doc/library/multiprocessing.rst | 2 +- Doc/whatsnew/3.14.rst | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index f054e5066f7158..036b8f44b9ff3b 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -168,7 +168,7 @@ to start a process. These *start methods* are On POSIX platforms the default start method was changed from *fork* to *forkserver* to retain the performance but avoid common multithreaded - process incompatibilities. See :issue:`84559`. + process incompatibilities. See :gh:`84559`. On POSIX using the *spawn* or *forkserver* start methods will also diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5acb9bfe18b2d0..366c072eb1863f 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -367,6 +367,14 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`multiprocessing` and :mod:`concurrent.futures`: + The default start method (see :ref:`multiprocessing-start-methods`) changed + away from *fork* to *forkserver* on platforms where it was not already + *spawn* (Windows & macOS). If you require the threading incompatible *fork* + start method you must explicitly specify it when using :mod:`multiprocessing` + or :mod:`concurrent.futures` APIs. + (Contributed by Gregory P. Smith in :gh:`84559`.) + * :mod:`os`: :term:`Soft deprecate ` :func:`os.popen` and :func:`os.spawn* ` functions. They should no longer be used to From 21f828f559ba9f708af0f0f735b29366785e901a Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:15:12 +0000 Subject: [PATCH 05/11] elif vs if makes a difference... --- Lib/test/_test_multiprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index db30f1fc6ce70a..46fc0e53626998 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5558,7 +5558,7 @@ def test_get_all_start_methods(self): self.assertIn('spawn', methods) if sys.platform == 'win32': self.assertEqual(methods, ['spawn']) - if sys.platform == 'darwin': + elif sys.platform == 'darwin': self.assertEqual(methods[0], 'spawn') # The default is first. # Whether these work or not, they remain available on macOS. self.assertIn('fork', methods) From 823ec3507f6a6415fa66b5e57b28ebb654b606cd Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:15:28 +0000 Subject: [PATCH 06/11] simplify the default selection logic. --- Lib/multiprocessing/context.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Lib/multiprocessing/context.py b/Lib/multiprocessing/context.py index 02e5cbf6429685..d0a3ad00e53ad8 100644 --- a/Lib/multiprocessing/context.py +++ b/Lib/multiprocessing/context.py @@ -261,8 +261,8 @@ def get_all_start_methods(self): """Returns a list of the supported start methods, default first.""" default = self._default_context.get_start_method() start_method_names = [default] - start_method_names += ( - name for name in _concrete_contexts if name != default + start_method_names.extend( + name for name in _concrete_contexts if name != default ) return start_method_names @@ -319,18 +319,15 @@ def _check_available(self): 'spawn': SpawnContext(), 'forkserver': ForkServerContext(), } - if sys.platform == 'darwin': - # bpo-33725: running arbitrary code after fork() is no longer reliable - # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. - _default_context = DefaultContext(_concrete_contexts['spawn']) + # bpo-33725: running arbitrary code after fork() is no longer reliable + # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. + # gh-84559: We changed everyones default to a thread safeish one in 3.14. + if reduction.HAVE_SEND_HANDLE and sys.platform != 'darwin': + _default_context = DefaultContext(_concrete_contexts['forkserver']) else: - # gh-84559: We changed the default to a thread safe one in 3.14. - if reduction.HAVE_SEND_HANDLE: - _default_context = DefaultContext(_concrete_contexts['forkserver']) - else: - _default_context = DefaultContext(_concrete_contexts['spawn']) + _default_context = DefaultContext(_concrete_contexts['spawn']) -else: +else: # Windows class SpawnProcess(process.BaseProcess): _start_method = 'spawn' From 7fbf3506ddc08bc71cf56729ab1811fa8c7fcc58 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:17:13 +0000 Subject: [PATCH 07/11] fix comment typo --- Lib/test/support/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b0a53bf829d2a8..5d67c97f53aee6 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2215,7 +2215,7 @@ def skip_if_broken_multiprocessing_synchronize(): # default breaks TestResourceTracker.test_resource_tracker_reused # when the default start method is not fork as synchronize creates # a new multiprocessing.resource_tracker process at module import - # time via the aboe call in that scenario. This enables gh-84559. + # time via the above call in that scenario. This enables gh-84559. except OSError as exc: raise unittest.SkipTest(f"broken multiprocessing SemLock: {exc!r}") From ea574cc7956fb2ecee11ea62ec366a8dfb16cb83 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Wed, 25 Sep 2024 22:18:53 -0700 Subject: [PATCH 08/11] typo fix in NEWS Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .../next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst index 883e4105ef6f3b..a505ffad123700 100644 --- a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst +++ b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst @@ -1 +1 @@ -The default :mod:`multiprocessing` start method on Linux and other POSIX systems has been changed away from often unsafe``"fork"`` to ``"forkserver"`` (when the platform support sending file handles over pipes) or ``"spawn"``. Mac and Windows are unchanged as they already default to ``"spawn"``. +The default :mod:`multiprocessing` start method on Linux and other POSIX systems has been changed away from often unsafe ``"fork"`` to ``"forkserver"`` (when the platform support sending file handles over pipes) or ``"spawn"``. Mac and Windows are unchanged as they already default to ``"spawn"``. From 449257476509b6eb5dcbf88251846806a9e61eb7 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:25:45 +0000 Subject: [PATCH 09/11] Better explain test logic with comments. --- Lib/test/_test_multiprocessing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 46fc0e53626998..a059a6b8340448 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5567,11 +5567,14 @@ def test_get_all_start_methods(self): # POSIX self.assertIn('fork', methods) if other_methods := set(methods) - {'fork', 'spawn'}: + # If there are more than those two, forkserver must be one. self.assertEqual({'forkserver'}, other_methods) - # >=3.14 Defaults to forkserver if the platform supports it. + # The default is the first method in the list. self.assertIn(methods[0], {'forkserver', 'spawn'}, msg='3.14+ default must not be fork') if methods[0] == 'spawn': + # Confirm that the current default selection logic prefers + # forkserver vs spawn when available. self.assertNotIn('forkserver', methods) def test_preload_resources(self): From 0889ab550d14d5ee514ffc3748e7b43af5bd1229 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:31:18 +0000 Subject: [PATCH 10/11] Make an explanatory comment more clear. --- Lib/test/support/__init__.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5d67c97f53aee6..99cb10fc7b5f7b 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2211,11 +2211,13 @@ def skip_if_broken_multiprocessing_synchronize(): # a file in /dev/shm/ directory. import multiprocessing synchronize.Lock(ctx=multiprocessing.get_context('fork')) - # The explicit fork mp context is required as relying on the - # default breaks TestResourceTracker.test_resource_tracker_reused - # when the default start method is not fork as synchronize creates - # a new multiprocessing.resource_tracker process at module import - # time via the above call in that scenario. This enables gh-84559. + # The explicit fork mp context is required in order for + # TestResourceTracker.test_resource_tracker_reused to work. + # synchronize creates a new multiprocessing.resource_tracker + # process at module import time via the above call in that + # scenario. Awkward. This enables gh-84559. No code involved + # should have threads at that point so fork() should be safe. + except OSError as exc: raise unittest.SkipTest(f"broken multiprocessing SemLock: {exc!r}") From 668559cac3265af70abf920c7a93b5aba16ab7d5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 26 Sep 2024 05:33:43 +0000 Subject: [PATCH 11/11] minor NEWS wording improvement. line length. --- .../Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst index a505ffad123700..a4428e20f3ccdd 100644 --- a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst +++ b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst @@ -1 +1,5 @@ -The default :mod:`multiprocessing` start method on Linux and other POSIX systems has been changed away from often unsafe ``"fork"`` to ``"forkserver"`` (when the platform support sending file handles over pipes) or ``"spawn"``. Mac and Windows are unchanged as they already default to ``"spawn"``. +The default :mod:`multiprocessing` start method on Linux and other POSIX +systems has been changed away from often unsafe ``"fork"`` to ``"forkserver"`` +(when the platform supports sending file handles over pipes as most do) or +``"spawn"``. Mac and Windows are unchanged as they already default to +``"spawn"``.