Skip to content

Commit 7e11f11

Browse files
committed
Add fast_type_map, use it authoritatively for local types and as a hint for global types
nanobind has a similar two-level lookup strategy, added and explained by wjakob/nanobind@b515b1f In this PR I've ported this approach to pybind11. To avoid an ABI break, I've kept the fast maps to the `local_internals`. I think this should be safe because any particular module should see its `local_internals` reset at least as often as the global `internals`, and misses in the fast "hint" map for global types fall back to the global `internals`. Performance seems to have improved. Using my patched fork of pybind11_benchmark (https://github.com/swolchok/pybind11_benchmark/tree/benchmark-updates, specifically commit hash b6613d12607104d547b1c10a8145d1b3e9937266), I run bench.py and observe the MyInt case. Each time, I do 3 runs and just report all 3. master, Mac: 75.9, 76.9, 75.3 nsec/loop this PR, Mac: 73.8, 73.8, 73.6 nsec/loop master, Linux box: 188, 187, 188 nsec/loop this PR, Linux box: 164, 165, 164 nsec/loop Note that the "real" percentage improvement is larger than implied by the above because master does not yet include pybind#5824.
1 parent 326b106 commit 7e11f11

File tree

5 files changed

+83
-16
lines changed

5 files changed

+83
-16
lines changed

include/pybind11/detail/class.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,12 @@ extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
221221
auto tindex = std::type_index(*tinfo->cpptype);
222222
internals.direct_conversions.erase(tindex);
223223

224+
auto &local_internals = get_local_internals();
224225
if (tinfo->module_local) {
225-
get_local_internals().registered_types_cpp.erase(tindex);
226+
local_internals.registered_types_cpp.erase(tinfo->cpptype);
226227
} else {
227228
internals.registered_types_cpp.erase(tindex);
229+
local_internals.global_registered_types_cpp_fast.erase(tinfo->cpptype);
228230
}
229231
internals.registered_types_py.erase(tinfo->type);
230232

include/pybind11/detail/internals.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,12 @@ struct type_equal_to {
177177
};
178178
#endif
179179

180+
template <typename value_type>
181+
// REVIEW: do we need to add a fancy hash for pointers or is the
182+
// possible identity hash function from the standard library (e.g.,
183+
// libstdc++) sufficient?
184+
using fast_type_map = std::unordered_map<const std::type_info *, value_type>;
185+
180186
template <typename value_type>
181187
using type_map = std::unordered_map<std::type_index, value_type, type_hash, type_equal_to>;
182188

@@ -302,7 +308,15 @@ struct internals {
302308
// impact any other modules, because the only things accessing the local internals is the
303309
// module that contains them.
304310
struct local_internals {
305-
type_map<type_info *> registered_types_cpp;
311+
// It should be safe to use fast_type_map here because this entire
312+
// data structure is scoped to our single module, and thus a single
313+
// DSO and single instance of type_info for any particular type.
314+
fast_type_map<type_info *> registered_types_cpp;
315+
316+
// fast hint for the *global* internals registered_types_cpp
317+
// map. If we lookup successfully, that's the right answer;
318+
// otherwise we go to the global map and then backfill this one.
319+
fast_type_map<type_info *> global_registered_types_cpp_fast;
306320
std::forward_list<ExceptionTranslator> registered_exception_translators;
307321
PyTypeObject *function_record_py_type = nullptr;
308322
};

include/pybind11/detail/type_caster_base.h

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -205,35 +205,57 @@ PYBIND11_NOINLINE detail::type_info *get_type_info(PyTypeObject *type) {
205205
return bases.front();
206206
}
207207

208-
inline detail::type_info *get_local_type_info(const std::type_index &tp) {
209-
auto &locals = get_local_internals().registered_types_cpp;
210-
auto it = locals.find(tp);
208+
inline detail::type_info *get_local_type_info(const std::type_info &tp,
209+
const local_internals &local_internals) {
210+
auto &locals = local_internals.registered_types_cpp;
211+
auto it = locals.find(&tp);
211212
if (it != locals.end()) {
212213
return it->second;
213214
}
214215
return nullptr;
215216
}
216217

217-
inline detail::type_info *get_global_type_info(const std::type_index &tp) {
218+
inline detail::type_info *get_local_type_info(const std::type_info &tp) {
219+
return get_local_type_info(tp, get_local_internals());
220+
}
221+
222+
inline detail::type_info *get_global_type_info(const std::type_info &tp,
223+
local_internals &local_internals) {
218224
return with_internals([&](internals &internals) {
219225
detail::type_info *type_info = nullptr;
226+
auto &fast_types = local_internals.global_registered_types_cpp_fast;
220227
auto &types = internals.registered_types_cpp;
221-
auto it = types.find(tp);
228+
auto fast_it = fast_types.find(&tp);
229+
if (fast_it != fast_types.end()) {
230+
#ifndef NDEBUG
231+
auto types_it = types.find(std::type_index(tp));
232+
assert(types_it != types.end());
233+
assert(types_it->second == fast_it->second);
234+
#endif
235+
return fast_it->second;
236+
}
237+
auto it = types.find(std::type_index(tp));
222238
if (it != types.end()) {
239+
fast_types.emplace(&tp, it->second);
223240
type_info = it->second;
224241
}
225242
return type_info;
226243
});
227244
}
228245

246+
inline detail::type_info *get_global_type_info(const std::type_info &tp) {
247+
return get_global_type_info(tp, get_local_internals());
248+
}
249+
229250
/// Return the type info for a given C++ type; on lookup failure can either throw or return
230251
/// nullptr.
231-
PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_index &tp,
252+
PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_info &tp,
232253
bool throw_if_missing = false) {
233-
if (auto *ltype = get_local_type_info(tp)) {
254+
auto &local_internals = get_local_internals();
255+
if (auto *ltype = get_local_type_info(tp, local_internals)) {
234256
return ltype;
235257
}
236-
if (auto *gtype = get_global_type_info(tp)) {
258+
if (auto *gtype = get_global_type_info(tp, local_internals)) {
237259
return gtype;
238260
}
239261

include/pybind11/pybind11.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,10 +1636,12 @@ class generic_type : public object {
16361636
with_internals([&](internals &internals) {
16371637
auto tindex = std::type_index(*rec.type);
16381638
tinfo->direct_conversions = &internals.direct_conversions[tindex];
1639+
auto &local_internals = get_local_internals();
16391640
if (rec.module_local) {
1640-
get_local_internals().registered_types_cpp[tindex] = tinfo;
1641+
local_internals.registered_types_cpp[rec.type] = tinfo;
16411642
} else {
16421643
internals.registered_types_cpp[tindex] = tinfo;
1644+
local_internals.global_registered_types_cpp_fast[rec.type] = tinfo;
16431645
}
16441646

16451647
PYBIND11_WARNING_PUSH
@@ -2137,10 +2139,16 @@ class class_ : public detail::generic_type {
21372139

21382140
if (has_alias) {
21392141
with_internals([&](internals &internals) {
2140-
auto &instances = record.module_local ? get_local_internals().registered_types_cpp
2141-
: internals.registered_types_cpp;
2142-
instances[std::type_index(typeid(type_alias))]
2143-
= instances[std::type_index(typeid(type))];
2142+
auto &local_internals = get_local_internals();
2143+
if (record.module_local) {
2144+
local_internals.registered_types_cpp[&typeid(type_alias)]
2145+
= local_internals.registered_types_cpp[&typeid(type)];
2146+
} else {
2147+
type_info *const val
2148+
= internals.registered_types_cpp[std::type_index(typeid(type))];
2149+
internals.registered_types_cpp[std::type_index(typeid(type_alias))] = val;
2150+
local_internals.global_registered_types_cpp_fast[&typeid(type_alias)] = val;
2151+
}
21442152
});
21452153
}
21462154
def("_pybind11_conduit_v1_", cpp_conduit_method);

tests/test_embed/external_module.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,32 @@ namespace py = pybind11;
66
* modules aren't preserved over a finalize/initialize.
77
*/
88

9+
namespace {
10+
// Compare unsafe_reset_internals_for_single_interpreter in
11+
// test_subinterpreter.cpp.
12+
void unsafe_reset_local_internals() {
13+
// NOTE: This code is NOT SAFE unless the caller guarantees no other threads are alive
14+
// NOTE: This code is tied to the precise implementation of the internals holder
15+
16+
// first, unref the thread local internals
17+
py::detail::get_local_internals_pp_manager().unref();
18+
19+
// now we unref the static global singleton internals
20+
py::detail::get_local_internals_pp_manager().unref();
21+
22+
// finally, we reload the static global singleton
23+
py::detail::get_local_internals();
24+
}
25+
} // namespace
26+
927
PYBIND11_MODULE(external_module,
1028
m,
1129
py::mod_gil_not_used(),
1230
py::multiple_interpreters::per_interpreter_gil()) {
13-
31+
// At least one test ("Single Subinterpreter") wants to reset
32+
// internals. We have separate local internals because we are a
33+
// separate DSO, so ours need to be reset too!
34+
unsafe_reset_local_internals();
1435
class A {
1536
public:
1637
explicit A(int value) : v{value} {};

0 commit comments

Comments
 (0)