Skip to content

fix: modify the internals pointer-to-pointer implementation to not use thread_local #5709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions include/pybind11/detail/common.h
Original file line number Diff line number Diff line change
@@ -256,7 +256,7 @@

// Slightly faster code paths are available when PYBIND11_HAS_SUBINTERPRETER_SUPPORT is *not*
// defined, so avoid defining it for implementations that do not support subinterpreters. However,
// defining it unnecessarily is not expected to break anything (other than old iOS targets).
// defining it unnecessarily is not expected to break anything.
// This can be overridden by the user with -DPYBIND11_HAS_SUBINTERPRETER_SUPPORT=1 or 0
#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
# if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON)
@@ -345,7 +345,11 @@
#define PYBIND11_STRINGIFY(x) #x
#define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x)
#define PYBIND11_CONCAT(first, second) first##second
#define PYBIND11_ENSURE_INTERNALS_READY pybind11::detail::get_internals();
#define PYBIND11_ENSURE_INTERNALS_READY \
{ \
pybind11::detail::get_internals_pp_manager().unref(); \
pybind11::detail::get_internals(); \
}

#if !defined(GRAALVM_PYTHON)
# define PYBIND11_PYCFUNCTION_GET_DOC(func) ((func)->m_ml->ml_doc)
348 changes: 207 additions & 141 deletions include/pybind11/detail/internals.h

Large diffs are not rendered by default.

24 changes: 9 additions & 15 deletions include/pybind11/detail/type_caster_base.h
Original file line number Diff line number Diff line change
@@ -45,27 +45,21 @@ class loader_life_support {
loader_life_support *parent = nullptr;
std::unordered_set<PyObject *> keep_alive;

// Store stack pointer in thread-local storage.
static PYBIND11_TLS_KEY_REF get_stack_tls_key() {
return get_internals().loader_life_support_tls_key;
}
static loader_life_support *get_stack_top() {
return static_cast<loader_life_support *>(PYBIND11_TLS_GET_VALUE(get_stack_tls_key()));
}
static void set_stack_top(loader_life_support *value) {
PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value);
}

public:
/// A new patient frame is created when a function is entered
loader_life_support() : parent{get_stack_top()} { set_stack_top(this); }
loader_life_support() {
auto &stack_top = get_internals().loader_life_support_tls;
parent = stack_top.get();
stack_top = this;
}

/// ... and destroyed after it returns
~loader_life_support() {
if (get_stack_top() != this) {
auto &stack_top = get_internals().loader_life_support_tls;
if (stack_top.get() != this) {
pybind11_fail("loader_life_support: internal error");
}
set_stack_top(parent);
stack_top = parent;
for (auto *item : keep_alive) {
Py_DECREF(item);
}
@@ -74,7 +68,7 @@ class loader_life_support {
/// This can only be used inside a pybind11-bound function, either by `argument_loader`
/// at argument preparation time or by `py::cast()` at execution time.
PYBIND11_NOINLINE static void add_patient(handle h) {
loader_life_support *frame = get_stack_top();
loader_life_support *frame = get_internals().loader_life_support_tls.get();
if (!frame) {
// NOTE: It would be nice to include the stack frames here, as this indicates
// use of pybind11::cast<> outside the normal call framework, finding such
34 changes: 16 additions & 18 deletions include/pybind11/embed.h
Original file line number Diff line number Diff line change
@@ -240,31 +240,29 @@ inline void initialize_interpreter(bool init_signal_handlers = true,

\endrst */
inline void finalize_interpreter() {
// Get the internals pointer (without creating it if it doesn't exist). It's possible for the
// internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()`
// during destruction), so we get the pointer-pointer here and check it after Py_Finalize().
auto *&internals_ptr_ptr = detail::get_internals_pp<detail::internals>();
auto *&local_internals_ptr_ptr = detail::get_internals_pp<detail::local_internals>();
{
dict state_dict = detail::get_python_state_dict();
internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict<detail::internals>(
state_dict, PYBIND11_INTERNALS_ID);
local_internals_ptr_ptr
= detail::get_internals_pp_from_capsule_in_state_dict<detail::local_internals>(
state_dict, detail::get_local_internals_id());
// get rid of any thread-local interpreter cache that currently exists
if (detail::get_num_interpreters_seen() > 1) {
detail::get_internals_pp_manager().unref();
detail::get_local_internals_pp_manager().unref();

// We know there can be no other interpreter alive now, so we can lower the count
detail::get_num_interpreters_seen() = 1;
}

// Re-fetch the internals pointer-to-pointer (but not the internals itself, which might not
// exist). It's possible for the internals to be created during Py_Finalize() (e.g. if a
// py::capsule calls `get_internals()` during destruction), so we get the pointer-pointer here
// and check it after Py_Finalize().
detail::get_internals_pp_manager().get_pp();
detail::get_local_internals_pp_manager().get_pp();

Py_Finalize();

if (internals_ptr_ptr) {
internals_ptr_ptr->reset();
}
detail::get_internals_pp_manager().destroy();

// Local internals contains data managed by the current interpreter, so we must clear them to
// avoid undefined behaviors when initializing another interpreter
if (local_internals_ptr_ptr) {
local_internals_ptr_ptr->reset();
}
detail::get_local_internals_pp_manager().destroy();

// We know there is no interpreter alive now, so we can reset the count
detail::get_num_interpreters_seen() = 0;
12 changes: 5 additions & 7 deletions include/pybind11/gil.h
Original file line number Diff line number Diff line change
@@ -68,7 +68,7 @@ class gil_scoped_acquire {
public:
PYBIND11_NOINLINE gil_scoped_acquire() {
auto &internals = detail::get_internals();
tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate);
tstate = internals.tstate.get();

if (!tstate) {
/* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if
@@ -87,7 +87,7 @@ class gil_scoped_acquire {
}
# endif
tstate->gilstate_counter = 0;
PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate);
internals.tstate = tstate;
} else {
release = detail::get_thread_state_unchecked() != tstate;
}
@@ -124,7 +124,7 @@ class gil_scoped_acquire {
if (active) {
PyThreadState_DeleteCurrent();
}
PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
detail::get_internals().tstate.reset();
release = false;
}
}
@@ -161,8 +161,7 @@ class gil_scoped_release {
// NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
tstate = PyEval_SaveThread();
if (disassoc) {
auto key = internals.tstate; // NOLINT(readability-qualified-auto)
PYBIND11_TLS_DELETE_VALUE(key);
internals.tstate.reset();
}
}

@@ -185,8 +184,7 @@ class gil_scoped_release {
PyEval_RestoreThread(tstate);
}
if (disassoc) {
auto key = detail::get_internals().tstate; // NOLINT(readability-qualified-auto)
PYBIND11_TLS_REPLACE_VALUE(key, tstate);
detail::get_internals().tstate = tstate;
}
}

37 changes: 11 additions & 26 deletions include/pybind11/subinterpreter.h
Original file line number Diff line number Diff line change
@@ -159,34 +159,19 @@ class subinterpreter {

bool switch_back = old_tstate && old_tstate->interp != istate_;

// Get the internals pointer (without creating it if it doesn't exist). It's possible
// for the internals to be created during Py_EndInterpreter() (e.g. if a py::capsule
// calls `get_internals()` during destruction), so we get the pointer-pointer here and
// check it after.
auto *&internals_ptr_ptr = detail::get_internals_pp<detail::internals>();
auto *&local_internals_ptr_ptr = detail::get_internals_pp<detail::local_internals>();
{
dict sd = state_dict();
internals_ptr_ptr
= detail::get_internals_pp_from_capsule_in_state_dict<detail::internals>(
sd, PYBIND11_INTERNALS_ID);
local_internals_ptr_ptr
= detail::get_internals_pp_from_capsule_in_state_dict<detail::local_internals>(
sd, detail::get_local_internals_id());
}
// Internals always exists in the subinterpreter, this class enforces it when it creates
// the subinterpreter. Even if it didn't, this only creates the pointer-to-pointer, not the
// internals themselves.
detail::get_internals_pp_manager().get_pp();
detail::get_local_internals_pp_manager().get_pp();

// End it
Py_EndInterpreter(destroy_tstate);

// do NOT decrease detail::get_num_interpreters_seen, because it can never decrease
// while other threads are running...

if (internals_ptr_ptr) {
internals_ptr_ptr->reset();
}
if (local_internals_ptr_ptr) {
local_internals_ptr_ptr->reset();
}
// It's possible for the internals to be created during endinterpreter (e.g. if a
// py::capsule calls `get_internals()` during destruction), so we destroy afterward.
detail::get_internals_pp_manager().destroy();
detail::get_local_internals_pp_manager().destroy();

// switch back to the old tstate and old GIL (if there was one)
if (switch_back)
@@ -271,7 +256,7 @@ inline subinterpreter_scoped_activate::subinterpreter_scoped_activate(subinterpr
old_tstate_ = PyThreadState_Swap(tstate_);

// save this in internals for scoped_gil calls
PYBIND11_TLS_REPLACE_VALUE(detail::get_internals().tstate, tstate_);
detail::get_internals().tstate = tstate_;
}

inline subinterpreter_scoped_activate::~subinterpreter_scoped_activate() {
@@ -307,7 +292,7 @@ inline subinterpreter_scoped_activate::~subinterpreter_scoped_activate() {
pybind11_fail("~subinterpreter_scoped_activate: thread state must be current!");
}
#endif
PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
detail::get_internals().tstate.reset();
PyThreadState_Clear(tstate_);
PyThreadState_DeleteCurrent();
}
1 change: 0 additions & 1 deletion tests/pyproject.toml
Original file line number Diff line number Diff line change
@@ -33,4 +33,3 @@ pyodide.test-groups = ["numpy", "scipy"]
ios.test-groups = ["numpy"]
ios.xbuild-tools = ["cmake", "ninja"]
ios.environment.PIP_EXTRA_INDEX_URL = "https://pypi.anaconda.org/beeware/simple"
ios.config-settings."cmake.define.CMAKE_CXX_FLAGS" = "-DPYBIND11_HAS_SUBINTERPRETER_SUPPORT=0"
19 changes: 4 additions & 15 deletions tests/test_embed/test_interpreter.cpp
Original file line number Diff line number Diff line change
@@ -25,14 +25,8 @@ bool has_state_dict_internals_obj() {
return state.contains(PYBIND11_INTERNALS_ID);
}

bool has_pybind11_internals_static() {
auto *&ipp = py::detail::get_internals_pp<py::detail::internals>();
return (ipp != nullptr) && *ipp;
}

uintptr_t get_details_as_uintptr() {
return reinterpret_cast<uintptr_t>(
py::detail::get_internals_pp<py::detail::internals>()->get());
return reinterpret_cast<uintptr_t>(py::detail::get_internals_pp_manager().get_pp()->get());
}

class Widget {
@@ -278,7 +272,6 @@ TEST_CASE("Restart the interpreter") {
// Verify pre-restart state.
REQUIRE(py::module_::import("widget_module").attr("add")(1, 2).cast<int>() == 3);
REQUIRE(has_state_dict_internals_obj());
REQUIRE(has_pybind11_internals_static());
REQUIRE(py::module_::import("external_module").attr("A")(123).attr("value").cast<int>()
== 123);

@@ -295,10 +288,10 @@ TEST_CASE("Restart the interpreter") {

// Internals are deleted after a restart.
REQUIRE_FALSE(has_state_dict_internals_obj());
REQUIRE_FALSE(has_pybind11_internals_static());
REQUIRE(get_details_as_uintptr() == 0);
pybind11::detail::get_internals();
REQUIRE(has_state_dict_internals_obj());
REQUIRE(has_pybind11_internals_static());
REQUIRE(get_details_as_uintptr() != 0);
REQUIRE(get_details_as_uintptr()
== py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>());

@@ -311,18 +304,15 @@ TEST_CASE("Restart the interpreter") {
= py::capsule(&ran, [](void *ran) {
py::detail::get_internals();
REQUIRE(has_state_dict_internals_obj());
REQUIRE(has_pybind11_internals_static());
*static_cast<bool *>(ran) = true;
});
REQUIRE_FALSE(has_state_dict_internals_obj());
REQUIRE_FALSE(has_pybind11_internals_static());
REQUIRE_FALSE(ran);
py::finalize_interpreter();
REQUIRE(ran);
REQUIRE_FALSE(has_pybind11_internals_static());
py::initialize_interpreter();
REQUIRE_FALSE(has_state_dict_internals_obj());
REQUIRE_FALSE(has_pybind11_internals_static());
REQUIRE(get_details_as_uintptr() == 0);

// C++ modules can be reloaded.
auto cpp_module = py::module_::import("widget_module");
@@ -348,7 +338,6 @@ TEST_CASE("Threads") {
// Restart interpreter to ensure threads are not initialized
py::finalize_interpreter();
py::initialize_interpreter();
REQUIRE_FALSE(has_pybind11_internals_static());

constexpr auto num_threads = 10;
auto locals = py::dict("count"_a = 0);
13 changes: 7 additions & 6 deletions tests/test_embed/test_subinterpreter.cpp
Original file line number Diff line number Diff line change
@@ -17,20 +17,21 @@ namespace py = pybind11;
using namespace py::literals;

bool has_state_dict_internals_obj();
bool has_pybind11_internals_static();
uintptr_t get_details_as_uintptr();

void unsafe_reset_internals_for_single_interpreter() {
// unsafe normally, but for subsequent tests, put this back.. we know there are no threads
// running and only 1 interpreter
py::detail::get_internals_pp_manager().unref();
py::detail::get_local_internals_pp_manager().unref();
py::detail::get_num_interpreters_seen() = 1;
py::detail::get_internals_pp<py::detail::internals>() = nullptr;
py::detail::get_internals();
py::detail::get_internals_pp<py::detail::local_internals>() = nullptr;
py::detail::get_local_internals();
}

TEST_CASE("Single Subinterpreter") {
unsafe_reset_internals_for_single_interpreter();

py::module_::import("external_module"); // in the main interpreter

// Add tags to the modules in the main interpreter and test the basics.
@@ -42,7 +43,6 @@ TEST_CASE("Single Subinterpreter") {
REQUIRE(m.attr("add")(1, 2).cast<int>() == 3);
}
REQUIRE(has_state_dict_internals_obj());
REQUIRE(has_pybind11_internals_static());

auto main_int
= py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>();
@@ -52,14 +52,13 @@ TEST_CASE("Single Subinterpreter") {
py::scoped_subinterpreter ssi;

// The subinterpreter has internals populated
REQUIRE(has_pybind11_internals_static());
REQUIRE(has_state_dict_internals_obj());

py::list(py::module_::import("sys").attr("path")).append(py::str("."));

auto ext_int
= py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>();
py::detail::get_internals();
REQUIRE(has_pybind11_internals_static());
REQUIRE(get_details_as_uintptr() == ext_int);
REQUIRE(ext_int != main_int);

@@ -205,6 +204,8 @@ TEST_CASE("GIL Subinterpreter") {
}

TEST_CASE("Multiple Subinterpreters") {
unsafe_reset_internals_for_single_interpreter();

// Make sure the module is in the main interpreter and save its pointer
auto *main_ext = py::module_::import("external_module").ptr();
auto main_int