Skip to content

Commit 200271c

Browse files
authored
gh-114763: Protect lazy loading modules from attribute access races (GH-114781)
Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
1 parent ef6074b commit 200271c

File tree

3 files changed

+94
-32
lines changed

3 files changed

+94
-32
lines changed

Lib/importlib/util.py

+51-30
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import _imp
1515
import sys
16+
import threading
1617
import types
1718

1819

@@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):
171172

172173
def __getattribute__(self, attr):
173174
"""Trigger the load of the module and return the attribute."""
174-
# All module metadata must be garnered from __spec__ in order to avoid
175-
# using mutated values.
176-
# Stop triggering this method.
177-
self.__class__ = types.ModuleType
178-
# Get the original name to make sure no object substitution occurred
179-
# in sys.modules.
180-
original_name = self.__spec__.name
181-
# Figure out exactly what attributes were mutated between the creation
182-
# of the module and now.
183-
attrs_then = self.__spec__.loader_state['__dict__']
184-
attrs_now = self.__dict__
185-
attrs_updated = {}
186-
for key, value in attrs_now.items():
187-
# Code that set the attribute may have kept a reference to the
188-
# assigned object, making identity more important than equality.
189-
if key not in attrs_then:
190-
attrs_updated[key] = value
191-
elif id(attrs_now[key]) != id(attrs_then[key]):
192-
attrs_updated[key] = value
193-
self.__spec__.loader.exec_module(self)
194-
# If exec_module() was used directly there is no guarantee the module
195-
# object was put into sys.modules.
196-
if original_name in sys.modules:
197-
if id(self) != id(sys.modules[original_name]):
198-
raise ValueError(f"module object for {original_name!r} "
199-
"substituted in sys.modules during a lazy "
200-
"load")
201-
# Update after loading since that's what would happen in an eager
202-
# loading situation.
203-
self.__dict__.update(attrs_updated)
175+
__spec__ = object.__getattribute__(self, '__spec__')
176+
loader_state = __spec__.loader_state
177+
with loader_state['lock']:
178+
# Only the first thread to get the lock should trigger the load
179+
# and reset the module's class. The rest can now getattr().
180+
if object.__getattribute__(self, '__class__') is _LazyModule:
181+
# The first thread comes here multiple times as it descends the
182+
# call stack. The first time, it sets is_loading and triggers
183+
# exec_module(), which will access module.__dict__, module.__name__,
184+
# and/or module.__spec__, reentering this method. These accesses
185+
# need to be allowed to proceed without triggering the load again.
186+
if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
187+
return object.__getattribute__(self, attr)
188+
loader_state['is_loading'] = True
189+
190+
__dict__ = object.__getattribute__(self, '__dict__')
191+
192+
# All module metadata must be gathered from __spec__ in order to avoid
193+
# using mutated values.
194+
# Get the original name to make sure no object substitution occurred
195+
# in sys.modules.
196+
original_name = __spec__.name
197+
# Figure out exactly what attributes were mutated between the creation
198+
# of the module and now.
199+
attrs_then = loader_state['__dict__']
200+
attrs_now = __dict__
201+
attrs_updated = {}
202+
for key, value in attrs_now.items():
203+
# Code that set an attribute may have kept a reference to the
204+
# assigned object, making identity more important than equality.
205+
if key not in attrs_then:
206+
attrs_updated[key] = value
207+
elif id(attrs_now[key]) != id(attrs_then[key]):
208+
attrs_updated[key] = value
209+
__spec__.loader.exec_module(self)
210+
# If exec_module() was used directly there is no guarantee the module
211+
# object was put into sys.modules.
212+
if original_name in sys.modules:
213+
if id(self) != id(sys.modules[original_name]):
214+
raise ValueError(f"module object for {original_name!r} "
215+
"substituted in sys.modules during a lazy "
216+
"load")
217+
# Update after loading since that's what would happen in an eager
218+
# loading situation.
219+
__dict__.update(attrs_updated)
220+
# Finally, stop triggering this method.
221+
self.__class__ = types.ModuleType
222+
204223
return getattr(self, attr)
205224

206225
def __delattr__(self, attr):
@@ -244,5 +263,7 @@ def exec_module(self, module):
244263
loader_state = {}
245264
loader_state['__dict__'] = module.__dict__.copy()
246265
loader_state['__class__'] = module.__class__
266+
loader_state['lock'] = threading.RLock()
267+
loader_state['is_loading'] = False
247268
module.__spec__.loader_state = loader_state
248269
module.__class__ = _LazyModule

Lib/test/test_importlib/test_lazy.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
from importlib import abc
33
from importlib import util
44
import sys
5+
import time
6+
import threading
57
import types
68
import unittest
79

10+
from test.support import threading_helper
811
from test.test_importlib import util as test_util
912

1013

@@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
4043
module_name = 'lazy_loader_test'
4144
mutated_name = 'changed'
4245
loaded = None
46+
load_count = 0
4347
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
4448

4549
def find_spec(self, name, path, target=None):
@@ -48,8 +52,10 @@ def find_spec(self, name, path, target=None):
4852
return util.spec_from_loader(name, util.LazyLoader(self))
4953

5054
def exec_module(self, module):
55+
time.sleep(0.01) # Simulate a slow load.
5156
exec(self.source_code, module.__dict__)
5257
self.loaded = module
58+
self.load_count += 1
5359

5460

5561
class LazyLoaderTests(unittest.TestCase):
@@ -59,8 +65,9 @@ def test_init(self):
5965
# Classes that don't define exec_module() trigger TypeError.
6066
util.LazyLoader(object)
6167

62-
def new_module(self, source_code=None):
63-
loader = TestingImporter()
68+
def new_module(self, source_code=None, loader=None):
69+
if loader is None:
70+
loader = TestingImporter()
6471
if source_code is not None:
6572
loader.source_code = source_code
6673
spec = util.spec_from_loader(TestingImporter.module_name,
@@ -140,6 +147,37 @@ def test_module_already_in_sys(self):
140147
# Force the load; just care that no exception is raised.
141148
module.__name__
142149

150+
@threading_helper.requires_working_threading()
151+
def test_module_load_race(self):
152+
with test_util.uncache(TestingImporter.module_name):
153+
loader = TestingImporter()
154+
module = self.new_module(loader=loader)
155+
self.assertEqual(loader.load_count, 0)
156+
157+
class RaisingThread(threading.Thread):
158+
exc = None
159+
def run(self):
160+
try:
161+
super().run()
162+
except Exception as exc:
163+
self.exc = exc
164+
165+
def access_module():
166+
return module.attr
167+
168+
threads = []
169+
for _ in range(2):
170+
threads.append(thread := RaisingThread(target=access_module))
171+
thread.start()
172+
173+
# Races could cause errors
174+
for thread in threads:
175+
thread.join()
176+
self.assertIsNone(thread.exc)
177+
178+
# Or multiple load attempts
179+
self.assertEqual(loader.load_count, 1)
180+
143181

144182
if __name__ == '__main__':
145183
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
2+
conditions when multiple threads try to access attributes before the loading
3+
is complete.

0 commit comments

Comments
 (0)