diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h index 0661f209e3..bb3319eec6 100644 --- a/include/pybind11/detail/common.h +++ b/include/pybind11/detail/common.h @@ -256,7 +256,7 @@ // Slightly faster code paths are available when PYBIND11_HAS_SUBINTERPRETER_SUPPORT is *not* // defined, so avoid defining it for implementations that do not support subinterpreters. However, -// defining it unnecessarily is not expected to break anything (other than old iOS targets). +// defining it unnecessarily is not expected to break anything. // This can be overridden by the user with -DPYBIND11_HAS_SUBINTERPRETER_SUPPORT=1 or 0 #ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT # if PY_VERSION_HEX >= 0x030C0000 && !defined(PYPY_VERSION) && !defined(GRAALVM_PYTHON) @@ -345,7 +345,11 @@ #define PYBIND11_STRINGIFY(x) #x #define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x) #define PYBIND11_CONCAT(first, second) first##second -#define PYBIND11_ENSURE_INTERNALS_READY pybind11::detail::get_internals(); +#define PYBIND11_ENSURE_INTERNALS_READY \ + { \ + pybind11::detail::get_internals_pp_manager().unref(); \ + pybind11::detail::get_internals(); \ + } #if !defined(GRAALVM_PYTHON) # define PYBIND11_PYCFUNCTION_GET_DOC(func) ((func)->m_ml->ml_doc) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 123460308d..972682e5eb 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -35,27 +35,17 @@ /// further ABI-incompatible changes may be made before the ABI is officially /// changed to the new version. #ifndef PYBIND11_INTERNALS_VERSION -# define PYBIND11_INTERNALS_VERSION 9 +# define PYBIND11_INTERNALS_VERSION 10 #endif -#if PYBIND11_INTERNALS_VERSION < 9 -# error "PYBIND11_INTERNALS_VERSION 9 is the minimum for all platforms for pybind11v3." +#if PYBIND11_INTERNALS_VERSION < 10 +# error "PYBIND11_INTERNALS_VERSION 10 is the minimum for all platforms for pybind11v3." #endif PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE) using ExceptionTranslator = void (*)(std::exception_ptr); -PYBIND11_NAMESPACE_BEGIN(detail) - -constexpr const char *internals_function_record_capsule_name = "pybind11_function_record_capsule"; - -// Forward declarations -inline PyTypeObject *make_static_property_type(); -inline PyTypeObject *make_default_metaclass(); -inline PyObject *make_object_base_type(PyTypeObject *metaclass); -inline void translate_exception(std::exception_ptr p); - // The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new // Thread Specific Storage (TSS) API. // Avoid unnecessary allocation of `Py_tss_t`, since we cannot use @@ -84,6 +74,63 @@ inline void translate_exception(std::exception_ptr p); #define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set(&(key), nullptr) #define PYBIND11_TLS_FREE(key) PyThread_tss_delete(&(key)) +/// A smart-pointer-like wrapper around a thread-specific value. get/set of the pointer applies to +/// the current thread only. +template +class thread_specific_storage { +public: + thread_specific_storage() { + // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) + if (!PYBIND11_TLS_KEY_CREATE(key_)) { + pybind11_fail( + "thread_specific_storage constructor: could not initialize the TSS key!"); + } + } + + ~thread_specific_storage() { + // This destructor could be called *after* Py_Finalize(). That *SHOULD BE* fine. The + // following details what happens when PyThread_tss_free is called. + // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does + // nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. + // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). + // Neither of those have anything to do with CPython internals. PyMem_RawFree *requires* + // that the `key` be allocated with the CPython allocator (as it is by + // PyThread_tss_create). + PYBIND11_TLS_FREE(key_); + } + + thread_specific_storage(thread_specific_storage const &) = delete; + thread_specific_storage(thread_specific_storage &&) = delete; + thread_specific_storage &operator=(thread_specific_storage const &) = delete; + thread_specific_storage &operator=(thread_specific_storage &&) = delete; + + T *get() const { return reinterpret_cast(PYBIND11_TLS_GET_VALUE(key_)); } + + T &operator*() const { return *get(); } + explicit operator T *() const { return get(); } + explicit operator bool() const { return get() != nullptr; } + + void set(T *val) { PYBIND11_TLS_REPLACE_VALUE(key_, reinterpret_cast(val)); } + void reset(T *p = nullptr) { set(p); } + thread_specific_storage &operator=(T *pval) { + set(pval); + return *this; + } + +private: + PYBIND11_TLS_KEY_INIT(mutable key_) +}; + +PYBIND11_NAMESPACE_BEGIN(detail) + +constexpr const char *internals_function_record_capsule_name = "pybind11_function_record_capsule"; + +// Forward declarations +inline PyTypeObject *make_static_property_type(); +inline PyTypeObject *make_default_metaclass(); +inline PyObject *make_object_base_type(PyTypeObject *metaclass); +inline void translate_exception(std::exception_ptr p); + // Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly // other STLs, this means `typeid(A)` from one module won't equal `typeid(A)` from another module // even when `A` is the same, non-hidden-visibility type (e.g. from a common include). Under @@ -167,6 +214,8 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) { } #endif +class loader_life_support; + /// Internal data structure used to track registered instances and types. /// Whenever binary incompatible changes are made to this structure, /// `PYBIND11_INTERNALS_VERSION` must be incremented. @@ -181,7 +230,7 @@ struct internals { std::unordered_map> registered_types_py; #ifdef Py_GIL_DISABLED std::unique_ptr instance_shards; // void * -> instance* - size_t instance_shards_mask; + size_t instance_shards_mask = 0; #else instance_map registered_instances; // void * -> instance* #endif @@ -198,32 +247,21 @@ struct internals { PyTypeObject *default_metaclass = nullptr; PyObject *instance_base = nullptr; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: - PYBIND11_TLS_KEY_INIT(tstate) - PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key) + thread_specific_storage tstate; + thread_specific_storage loader_life_support_tls; // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined: PyInterpreterState *istate = nullptr; type_map native_enum_type_map; - internals() { + internals() + : static_property_type(make_static_property_type()), + default_metaclass(make_default_metaclass()) { PyThreadState *cur_tstate = PyThreadState_Get(); - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(tstate)) { - pybind11_fail( - "internals constructor: could not successfully initialize the tstate TSS key!"); - } - PYBIND11_TLS_REPLACE_VALUE(tstate, cur_tstate); - - // NOLINTNEXTLINE(bugprone-assignment-in-if-condition) - if (!PYBIND11_TLS_KEY_CREATE(loader_life_support_tls_key)) { - pybind11_fail("internals constructor: could not successfully initialize the " - "loader_life_support TSS key!"); - } + tstate = cur_tstate; istate = cur_tstate->interp; registered_exception_translators.push_front(&translate_exception); - static_property_type = make_static_property_type(); - default_metaclass = make_default_metaclass(); #ifdef Py_GIL_DISABLED // Scale proportional to the number of cores. 2x is a heuristic to reduce contention. auto num_shards @@ -236,19 +274,10 @@ struct internals { #endif } internals(const internals &other) = delete; + internals(internals &&other) = delete; internals &operator=(const internals &other) = delete; - ~internals() { - PYBIND11_TLS_FREE(loader_life_support_tls_key); - - // This destructor is called *after* Py_Finalize() in finalize_interpreter(). - // That *SHOULD BE* fine. The following details what happens when PyThread_tss_free is - // called. PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does - // nothing. PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. - // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). - // Neither of those have anything to do with CPython internals. PyMem_RawFree *requires* - // that the `tstate` be allocated with the CPython allocator. - PYBIND11_TLS_FREE(tstate); - } + internals &operator=(internals &&other) = delete; + ~internals() = default; }; // the internals struct (above) is shared between all the modules. local_internals are only @@ -322,33 +351,6 @@ inline std::atomic &get_num_interpreters_seen() { return counter; } -template -inline std::unique_ptr *&get_internals_pp() { -#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT - if (get_num_interpreters_seen() > 1) { - // Internals is one per interpreter. When multiple interpreters are alive in different - // threads we have to allow them to have different internals, so we need a thread_local. - static thread_local std::unique_ptr *t_internals_pp = nullptr; - static thread_local PyInterpreterState *istate_cached = nullptr; - // Whenever the interpreter changes on the current thread we need to invalidate the - // internals_pp so that it can be pulled from the interpreter's state dict. That is slow, - // so we use the current PyThreadState to check if it is necessary. The caller will see a - // null return and do the fetch from the state dict or create a new one (as needed). - auto *tstate = get_thread_state_unchecked(); - if (!tstate) { - istate_cached = nullptr; - t_internals_pp = nullptr; - } else if (tstate->interp != istate_cached) { - istate_cached = tstate->interp; - t_internals_pp = nullptr; - } - return t_internals_pp; - } -#endif - static std::unique_ptr *s_internals_pp = nullptr; - return s_internals_pp; -} - template >::value, int> = 0> bool handle_nested_exception(const T &exc, const std::exception_ptr &p) { @@ -475,62 +477,142 @@ inline object get_python_state_dict() { } template -inline std::unique_ptr * -get_internals_pp_from_capsule_in_state_dict(dict &state_dict, char const *state_dict_key) { - auto internals_obj - = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), state_dict_key)); - if (internals_obj) { - void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); - if (!raw_ptr) { - raise_from(PyExc_SystemError, - "pybind11::detail::get_internals_pp_from_capsule_in_state_dict() FAILED"); - throw error_already_set(); +class internals_pp_manager { +public: + using on_fetch_function = void(InternalsType *); + internals_pp_manager(char const *id, on_fetch_function *on_fetch) + : holder_id_(id), on_fetch_(on_fetch) {} + + /// Get the current pointer-to-pointer, allocating it if it does not already exist. May + /// acquire the GIL. Will never return nullptr. + std::unique_ptr *get_pp() { +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT + if (get_num_interpreters_seen() > 1) { + // Whenever the interpreter changes on the current thread we need to invalidate the + // internals_pp so that it can be pulled from the interpreter's state dict. That is + // slow, so we use the current PyThreadState to check if it is necessary. + auto *tstate = get_thread_state_unchecked(); + if (!tstate || tstate->interp != last_istate_.get()) { + gil_scoped_acquire_simple gil; + if (!tstate) { + tstate = get_thread_state_unchecked(); + } + last_istate_ = tstate->interp; + internals_tls_p_ = get_or_create_pp_in_state_dict(); + } + return internals_tls_p_.get(); } - return reinterpret_cast *>(raw_ptr); +#endif + if (!internals_singleton_pp_) { + gil_scoped_acquire_simple gil; + internals_singleton_pp_ = get_or_create_pp_in_state_dict(); + } + return internals_singleton_pp_; } - return nullptr; -} -/// Return a reference to the current `internals` data -PYBIND11_NOINLINE internals &get_internals() { - auto *&internals_pp = get_internals_pp(); - if (internals_pp && *internals_pp) { - // This is the fast path, everything is already setup, just return it - return **internals_pp; + /// Drop all the references we're currently holding. + void unref() { +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT + last_istate_.reset(); + internals_tls_p_.reset(); +#endif + internals_singleton_pp_ = nullptr; } - // Slow path, something needs fetched from the state dict or created - - // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. - gil_scoped_acquire_simple gil; - error_scope err_scope; + void destroy() { +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT + if (get_num_interpreters_seen() > 1) { + auto *tstate = get_thread_state_unchecked(); + // this could be called without an active interpreter, just use what was cached + if (!tstate || tstate->interp == last_istate_.get()) { + auto tpp = internals_tls_p_.get(); + if (tpp) { + delete tpp; + } + } + unref(); + return; + } +#endif + delete internals_singleton_pp_; + unref(); + } - dict state_dict = get_python_state_dict(); - internals_pp = get_internals_pp_from_capsule_in_state_dict(state_dict, - PYBIND11_INTERNALS_ID); - if (!internals_pp) { - internals_pp = new std::unique_ptr; - state_dict[PYBIND11_INTERNALS_ID] = capsule(reinterpret_cast(internals_pp)); +private: + std::unique_ptr *get_or_create_pp_in_state_dict() { + error_scope err_scope; + dict state_dict = get_python_state_dict(); + auto internals_obj + = reinterpret_steal(dict_getitemstringref(state_dict.ptr(), holder_id_)); + std::unique_ptr *pp = nullptr; + if (internals_obj) { + void *raw_ptr = PyCapsule_GetPointer(internals_obj.ptr(), /*name=*/nullptr); + if (!raw_ptr) { + raise_from(PyExc_SystemError, + "pybind11::detail::internals_pp_manager::get_pp_from_dict() FAILED"); + throw error_already_set(); + } + pp = reinterpret_cast *>(raw_ptr); + if (on_fetch_ && pp) { + on_fetch_(pp->get()); + } + } else { + pp = new std::unique_ptr; + // NOLINTNEXTLINE(bugprone-casting-through-void) + state_dict[holder_id_] = capsule(reinterpret_cast(pp)); + } + return pp; } - if (*internals_pp) { - // We loaded the internals through `state_dict`, which means that our `error_already_set` - // and `builtin_exception` may be different local classes than the ones set up in the - // initial exception translator, below, so add another for our local exception classes. - // - // libstdc++ doesn't require this (types there are identified only by name) - // libc++ with CPython doesn't require this (types are explicitly exported) - // libc++ with PyPy still need it, awaiting further investigation + char const *holder_id_ = nullptr; + on_fetch_function *on_fetch_ = nullptr; +#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT + thread_specific_storage last_istate_; + thread_specific_storage> internals_tls_p_; +#endif + std::unique_ptr *internals_singleton_pp_; +}; + +// If We loaded the internals through `state_dict`, our `error_already_set` +// and `builtin_exception` may be different local classes than the ones set up in the +// initial exception translator, below, so add another for our local exception classes. +// +// libstdc++ doesn't require this (types there are identified only by name) +// libc++ with CPython doesn't require this (types are explicitly exported) +// libc++ with PyPy still need it, awaiting further investigation #if !defined(__GLIBCXX__) - if ((*internals_pp)->registered_exception_translators.empty() - || (*internals_pp)->registered_exception_translators.front() - != &translate_local_exception) { - (*internals_pp) - ->registered_exception_translators.push_front(&translate_local_exception); +inline void check_internals_local_exception_translator(internals *internals_ptr) { + if (internals_ptr) { + for (auto et : internals_ptr->registered_exception_translators) { + if (et == &translate_local_exception) { + return; + } } + internals_ptr->registered_exception_translators.push_front(&translate_local_exception); + } +} #endif - } else { - auto &internals_ptr = *internals_pp; + +inline internals_pp_manager &get_internals_pp_manager() { +#if defined(__GLIBCXX__) +# define ON_FETCH_FN nullptr +#else +# define ON_FETCH_FN &check_internals_local_exception_translator +#endif + static internals_pp_manager internals_pp_manager(PYBIND11_INTERNALS_ID, + ON_FETCH_FN); +#undef ON_FETCH_FN + return internals_pp_manager; +} + +/// Return a reference to the current `internals` data +PYBIND11_NOINLINE internals &get_internals() { + auto &ppmgr = get_internals_pp_manager(); + auto &internals_ptr = *ppmgr.get_pp(); + if (!internals_ptr) { + // Slow path, something needs fetched from the state dict or created + gil_scoped_acquire_simple gil; + error_scope err_scope; internals_ptr.reset(new internals()); if (!internals_ptr->instance_base) { @@ -539,44 +621,28 @@ PYBIND11_NOINLINE internals &get_internals() { internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); } } - - return **internals_pp; + return *internals_ptr; } -/// A string key uniquely describing this module -inline char const *get_local_internals_id() { +inline internals_pp_manager &get_local_internals_pp_manager() { // Use the address of this static itself as part of the key, so that the value is uniquely tied // to where the module is loaded in memory static const std::string this_module_idstr = PYBIND11_MODULE_LOCAL_ID + std::to_string(reinterpret_cast(&this_module_idstr)); - return this_module_idstr.c_str(); + static internals_pp_manager local_internals_pp_manager( + this_module_idstr.c_str(), nullptr); + return local_internals_pp_manager; } /// Works like `get_internals`, but for things which are locally registered. inline local_internals &get_local_internals() { - auto *&local_internals_pp = get_internals_pp(); - if (local_internals_pp && *local_internals_pp) { - return **local_internals_pp; - } - - // Cannot use py::gil_scoped_acquire inside get_internals since that calls get_internals. - gil_scoped_acquire_simple gil; - error_scope err_scope; - - dict state_dict = get_python_state_dict(); - local_internals_pp = get_internals_pp_from_capsule_in_state_dict( - state_dict, get_local_internals_id()); - if (!local_internals_pp) { - local_internals_pp = new std::unique_ptr; - state_dict[get_local_internals_id()] - = capsule(reinterpret_cast(local_internals_pp)); + auto &ppmgr = get_local_internals_pp_manager(); + auto &internals_ptr = *ppmgr.get_pp(); + if (!internals_ptr) { + internals_ptr.reset(new local_internals()); } - if (!*local_internals_pp) { - local_internals_pp->reset(new local_internals()); - } - - return **local_internals_pp; + return *internals_ptr; } #ifdef Py_GIL_DISABLED diff --git a/include/pybind11/detail/type_caster_base.h b/include/pybind11/detail/type_caster_base.h index 9734d92181..aa8f2cf7e7 100644 --- a/include/pybind11/detail/type_caster_base.h +++ b/include/pybind11/detail/type_caster_base.h @@ -45,27 +45,21 @@ class loader_life_support { loader_life_support *parent = nullptr; std::unordered_set keep_alive; - // Store stack pointer in thread-local storage. - static PYBIND11_TLS_KEY_REF get_stack_tls_key() { - return get_internals().loader_life_support_tls_key; - } - static loader_life_support *get_stack_top() { - return static_cast(PYBIND11_TLS_GET_VALUE(get_stack_tls_key())); - } - static void set_stack_top(loader_life_support *value) { - PYBIND11_TLS_REPLACE_VALUE(get_stack_tls_key(), value); - } - public: /// A new patient frame is created when a function is entered - loader_life_support() : parent{get_stack_top()} { set_stack_top(this); } + loader_life_support() { + auto &stack_top = get_internals().loader_life_support_tls; + parent = stack_top.get(); + stack_top = this; + } /// ... and destroyed after it returns ~loader_life_support() { - if (get_stack_top() != this) { + auto &stack_top = get_internals().loader_life_support_tls; + if (stack_top.get() != this) { pybind11_fail("loader_life_support: internal error"); } - set_stack_top(parent); + stack_top = parent; for (auto *item : keep_alive) { Py_DECREF(item); } @@ -74,7 +68,7 @@ class loader_life_support { /// This can only be used inside a pybind11-bound function, either by `argument_loader` /// at argument preparation time or by `py::cast()` at execution time. PYBIND11_NOINLINE static void add_patient(handle h) { - loader_life_support *frame = get_stack_top(); + loader_life_support *frame = get_internals().loader_life_support_tls.get(); if (!frame) { // NOTE: It would be nice to include the stack frames here, as this indicates // use of pybind11::cast<> outside the normal call framework, finding such diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h index 812dd92e8a..e664bf298b 100644 --- a/include/pybind11/embed.h +++ b/include/pybind11/embed.h @@ -240,31 +240,29 @@ inline void initialize_interpreter(bool init_signal_handlers = true, \endrst */ inline void finalize_interpreter() { - // Get the internals pointer (without creating it if it doesn't exist). It's possible for the - // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` - // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). - auto *&internals_ptr_ptr = detail::get_internals_pp(); - auto *&local_internals_ptr_ptr = detail::get_internals_pp(); - { - dict state_dict = detail::get_python_state_dict(); - internals_ptr_ptr = detail::get_internals_pp_from_capsule_in_state_dict( - state_dict, PYBIND11_INTERNALS_ID); - local_internals_ptr_ptr - = detail::get_internals_pp_from_capsule_in_state_dict( - state_dict, detail::get_local_internals_id()); + // get rid of any thread-local interpreter cache that currently exists + if (detail::get_num_interpreters_seen() > 1) { + detail::get_internals_pp_manager().unref(); + detail::get_local_internals_pp_manager().unref(); + + // We know there can be no other interpreter alive now, so we can lower the count + detail::get_num_interpreters_seen() = 1; } + // Re-fetch the internals pointer-to-pointer (but not the internals itself, which might not + // exist). It's possible for the internals to be created during Py_Finalize() (e.g. if a + // py::capsule calls `get_internals()` during destruction), so we get the pointer-pointer here + // and check it after Py_Finalize(). + detail::get_internals_pp_manager().get_pp(); + detail::get_local_internals_pp_manager().get_pp(); + Py_Finalize(); - if (internals_ptr_ptr) { - internals_ptr_ptr->reset(); - } + detail::get_internals_pp_manager().destroy(); // Local internals contains data managed by the current interpreter, so we must clear them to // avoid undefined behaviors when initializing another interpreter - if (local_internals_ptr_ptr) { - local_internals_ptr_ptr->reset(); - } + detail::get_local_internals_pp_manager().destroy(); // We know there is no interpreter alive now, so we can reset the count detail::get_num_interpreters_seen() = 0; diff --git a/include/pybind11/gil.h b/include/pybind11/gil.h index e90ea41528..4222a035f4 100644 --- a/include/pybind11/gil.h +++ b/include/pybind11/gil.h @@ -68,7 +68,7 @@ class gil_scoped_acquire { public: PYBIND11_NOINLINE gil_scoped_acquire() { auto &internals = detail::get_internals(); - tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate); + tstate = internals.tstate.get(); if (!tstate) { /* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if @@ -87,7 +87,7 @@ class gil_scoped_acquire { } # endif tstate->gilstate_counter = 0; - PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate); + internals.tstate = tstate; } else { release = detail::get_thread_state_unchecked() != tstate; } @@ -124,7 +124,7 @@ class gil_scoped_acquire { if (active) { PyThreadState_DeleteCurrent(); } - PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate); + detail::get_internals().tstate.reset(); release = false; } } @@ -161,8 +161,7 @@ class gil_scoped_release { // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) tstate = PyEval_SaveThread(); if (disassoc) { - auto key = internals.tstate; // NOLINT(readability-qualified-auto) - PYBIND11_TLS_DELETE_VALUE(key); + internals.tstate.reset(); } } @@ -185,8 +184,7 @@ class gil_scoped_release { PyEval_RestoreThread(tstate); } if (disassoc) { - auto key = detail::get_internals().tstate; // NOLINT(readability-qualified-auto) - PYBIND11_TLS_REPLACE_VALUE(key, tstate); + detail::get_internals().tstate = tstate; } } diff --git a/include/pybind11/subinterpreter.h b/include/pybind11/subinterpreter.h index a1bef5a124..9f2f704c57 100644 --- a/include/pybind11/subinterpreter.h +++ b/include/pybind11/subinterpreter.h @@ -159,34 +159,19 @@ class subinterpreter { bool switch_back = old_tstate && old_tstate->interp != istate_; - // Get the internals pointer (without creating it if it doesn't exist). It's possible - // for the internals to be created during Py_EndInterpreter() (e.g. if a py::capsule - // calls `get_internals()` during destruction), so we get the pointer-pointer here and - // check it after. - auto *&internals_ptr_ptr = detail::get_internals_pp(); - auto *&local_internals_ptr_ptr = detail::get_internals_pp(); - { - dict sd = state_dict(); - internals_ptr_ptr - = detail::get_internals_pp_from_capsule_in_state_dict( - sd, PYBIND11_INTERNALS_ID); - local_internals_ptr_ptr - = detail::get_internals_pp_from_capsule_in_state_dict( - sd, detail::get_local_internals_id()); - } + // Internals always exists in the subinterpreter, this class enforces it when it creates + // the subinterpreter. Even if it didn't, this only creates the pointer-to-pointer, not the + // internals themselves. + detail::get_internals_pp_manager().get_pp(); + detail::get_local_internals_pp_manager().get_pp(); // End it Py_EndInterpreter(destroy_tstate); - // do NOT decrease detail::get_num_interpreters_seen, because it can never decrease - // while other threads are running... - - if (internals_ptr_ptr) { - internals_ptr_ptr->reset(); - } - if (local_internals_ptr_ptr) { - local_internals_ptr_ptr->reset(); - } + // It's possible for the internals to be created during endinterpreter (e.g. if a + // py::capsule calls `get_internals()` during destruction), so we destroy afterward. + detail::get_internals_pp_manager().destroy(); + detail::get_local_internals_pp_manager().destroy(); // switch back to the old tstate and old GIL (if there was one) if (switch_back) @@ -271,7 +256,7 @@ inline subinterpreter_scoped_activate::subinterpreter_scoped_activate(subinterpr old_tstate_ = PyThreadState_Swap(tstate_); // save this in internals for scoped_gil calls - PYBIND11_TLS_REPLACE_VALUE(detail::get_internals().tstate, tstate_); + detail::get_internals().tstate = tstate_; } inline subinterpreter_scoped_activate::~subinterpreter_scoped_activate() { @@ -307,7 +292,7 @@ inline subinterpreter_scoped_activate::~subinterpreter_scoped_activate() { pybind11_fail("~subinterpreter_scoped_activate: thread state must be current!"); } #endif - PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate); + detail::get_internals().tstate.reset(); PyThreadState_Clear(tstate_); PyThreadState_DeleteCurrent(); } diff --git a/tests/pyproject.toml b/tests/pyproject.toml index 3676c509ec..e58eb581ca 100644 --- a/tests/pyproject.toml +++ b/tests/pyproject.toml @@ -33,4 +33,3 @@ pyodide.test-groups = ["numpy", "scipy"] ios.test-groups = ["numpy"] ios.xbuild-tools = ["cmake", "ninja"] ios.environment.PIP_EXTRA_INDEX_URL = "https://pypi.anaconda.org/beeware/simple" -ios.config-settings."cmake.define.CMAKE_CXX_FLAGS" = "-DPYBIND11_HAS_SUBINTERPRETER_SUPPORT=0" diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp index 9515310891..bd50eaf1f2 100644 --- a/tests/test_embed/test_interpreter.cpp +++ b/tests/test_embed/test_interpreter.cpp @@ -25,14 +25,8 @@ bool has_state_dict_internals_obj() { return state.contains(PYBIND11_INTERNALS_ID); } -bool has_pybind11_internals_static() { - auto *&ipp = py::detail::get_internals_pp(); - return (ipp != nullptr) && *ipp; -} - uintptr_t get_details_as_uintptr() { - return reinterpret_cast( - py::detail::get_internals_pp()->get()); + return reinterpret_cast(py::detail::get_internals_pp_manager().get_pp()->get()); } class Widget { @@ -278,7 +272,6 @@ TEST_CASE("Restart the interpreter") { // Verify pre-restart state. REQUIRE(py::module_::import("widget_module").attr("add")(1, 2).cast() == 3); REQUIRE(has_state_dict_internals_obj()); - REQUIRE(has_pybind11_internals_static()); REQUIRE(py::module_::import("external_module").attr("A")(123).attr("value").cast() == 123); @@ -295,10 +288,10 @@ TEST_CASE("Restart the interpreter") { // Internals are deleted after a restart. REQUIRE_FALSE(has_state_dict_internals_obj()); - REQUIRE_FALSE(has_pybind11_internals_static()); + REQUIRE(get_details_as_uintptr() == 0); pybind11::detail::get_internals(); REQUIRE(has_state_dict_internals_obj()); - REQUIRE(has_pybind11_internals_static()); + REQUIRE(get_details_as_uintptr() != 0); REQUIRE(get_details_as_uintptr() == py::module_::import("external_module").attr("internals_at")().cast()); @@ -311,18 +304,15 @@ TEST_CASE("Restart the interpreter") { = py::capsule(&ran, [](void *ran) { py::detail::get_internals(); REQUIRE(has_state_dict_internals_obj()); - REQUIRE(has_pybind11_internals_static()); *static_cast(ran) = true; }); REQUIRE_FALSE(has_state_dict_internals_obj()); - REQUIRE_FALSE(has_pybind11_internals_static()); REQUIRE_FALSE(ran); py::finalize_interpreter(); REQUIRE(ran); - REQUIRE_FALSE(has_pybind11_internals_static()); py::initialize_interpreter(); REQUIRE_FALSE(has_state_dict_internals_obj()); - REQUIRE_FALSE(has_pybind11_internals_static()); + REQUIRE(get_details_as_uintptr() == 0); // C++ modules can be reloaded. auto cpp_module = py::module_::import("widget_module"); @@ -348,7 +338,6 @@ TEST_CASE("Threads") { // Restart interpreter to ensure threads are not initialized py::finalize_interpreter(); py::initialize_interpreter(); - REQUIRE_FALSE(has_pybind11_internals_static()); constexpr auto num_threads = 10; auto locals = py::dict("count"_a = 0); diff --git a/tests/test_embed/test_subinterpreter.cpp b/tests/test_embed/test_subinterpreter.cpp index 9614a76cd5..d314394389 100644 --- a/tests/test_embed/test_subinterpreter.cpp +++ b/tests/test_embed/test_subinterpreter.cpp @@ -17,20 +17,21 @@ namespace py = pybind11; using namespace py::literals; bool has_state_dict_internals_obj(); -bool has_pybind11_internals_static(); uintptr_t get_details_as_uintptr(); void unsafe_reset_internals_for_single_interpreter() { // unsafe normally, but for subsequent tests, put this back.. we know there are no threads // running and only 1 interpreter + py::detail::get_internals_pp_manager().unref(); + py::detail::get_local_internals_pp_manager().unref(); py::detail::get_num_interpreters_seen() = 1; - py::detail::get_internals_pp() = nullptr; py::detail::get_internals(); - py::detail::get_internals_pp() = nullptr; py::detail::get_local_internals(); } TEST_CASE("Single Subinterpreter") { + unsafe_reset_internals_for_single_interpreter(); + py::module_::import("external_module"); // in the main interpreter // Add tags to the modules in the main interpreter and test the basics. @@ -42,7 +43,6 @@ TEST_CASE("Single Subinterpreter") { REQUIRE(m.attr("add")(1, 2).cast() == 3); } REQUIRE(has_state_dict_internals_obj()); - REQUIRE(has_pybind11_internals_static()); auto main_int = py::module_::import("external_module").attr("internals_at")().cast(); @@ -52,14 +52,13 @@ TEST_CASE("Single Subinterpreter") { py::scoped_subinterpreter ssi; // The subinterpreter has internals populated - REQUIRE(has_pybind11_internals_static()); + REQUIRE(has_state_dict_internals_obj()); py::list(py::module_::import("sys").attr("path")).append(py::str(".")); auto ext_int = py::module_::import("external_module").attr("internals_at")().cast(); py::detail::get_internals(); - REQUIRE(has_pybind11_internals_static()); REQUIRE(get_details_as_uintptr() == ext_int); REQUIRE(ext_int != main_int); @@ -205,6 +204,8 @@ TEST_CASE("GIL Subinterpreter") { } TEST_CASE("Multiple Subinterpreters") { + unsafe_reset_internals_for_single_interpreter(); + // Make sure the module is in the main interpreter and save its pointer auto *main_ext = py::module_::import("external_module").ptr(); auto main_int