Skip to content

Commit a3af3cb

Browse files
authored
gh-110481: Implement inter-thread queue for biased reference counting (#114824)
Biased reference counting maintains two refcount fields in each object: `ob_ref_local` and `ob_ref_shared`. The true refcount is the sum of these two fields. In some cases, when refcounting operations are split across threads, the ob_ref_shared field can be negative (although the total refcount must be at least zero). In this case, the thread that decremented the refcount requests that the owning thread give up ownership and merge the refcount fields.
1 parent a225520 commit a3af3cb

21 files changed

+418
-11
lines changed

Include/internal/pycore_brc.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#ifndef Py_INTERNAL_BRC_H
2+
#define Py_INTERNAL_BRC_H
3+
4+
#include <stdint.h>
5+
#include "pycore_llist.h" // struct llist_node
6+
#include "pycore_lock.h" // PyMutex
7+
#include "pycore_object_stack.h" // _PyObjectStack
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
#ifndef Py_BUILD_CORE
14+
# error "this header requires Py_BUILD_CORE define"
15+
#endif
16+
17+
#ifdef Py_GIL_DISABLED
18+
19+
// Prime number to avoid correlations with memory addresses.
20+
#define _Py_BRC_NUM_BUCKETS 257
21+
22+
// Hash table bucket
23+
struct _brc_bucket {
24+
// Mutex protects both the bucket and thread state queues in this bucket.
25+
PyMutex mutex;
26+
27+
// Linked list of _PyThreadStateImpl objects hashed to this bucket.
28+
struct llist_node root;
29+
};
30+
31+
// Per-interpreter biased reference counting state
32+
struct _brc_state {
33+
// Hash table of thread states by thread-id. Thread states within a bucket
34+
// are chained using a doubly-linked list.
35+
struct _brc_bucket table[_Py_BRC_NUM_BUCKETS];
36+
};
37+
38+
// Per-thread biased reference counting state
39+
struct _brc_thread_state {
40+
// Linked-list of thread states per hash bucket
41+
struct llist_node bucket_node;
42+
43+
// Thread-id as determined by _PyThread_Id()
44+
uintptr_t tid;
45+
46+
// Objects with refcounts to be merged (protected by bucket mutex)
47+
_PyObjectStack objects_to_merge;
48+
49+
// Local stack of objects to be merged (not accessed by other threads)
50+
_PyObjectStack local_objects_to_merge;
51+
};
52+
53+
// Initialize/finalize the per-thread biased reference counting state
54+
void _Py_brc_init_thread(PyThreadState *tstate);
55+
void _Py_brc_remove_thread(PyThreadState *tstate);
56+
57+
// Initialize per-interpreter state
58+
void _Py_brc_init_state(PyInterpreterState *interp);
59+
60+
void _Py_brc_after_fork(PyInterpreterState *interp);
61+
62+
// Enqueues an object to be merged by it's owning thread (tid). This
63+
// steals a reference to the object.
64+
void _Py_brc_queue_object(PyObject *ob);
65+
66+
// Merge the refcounts of queued objects for the current thread.
67+
void _Py_brc_merge_refcounts(PyThreadState *tstate);
68+
69+
#endif /* Py_GIL_DISABLED */
70+
71+
#ifdef __cplusplus
72+
}
73+
#endif
74+
#endif /* !Py_INTERNAL_BRC_H */

Include/internal/pycore_ceval.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame)
206206
#define _PY_ASYNC_EXCEPTION_BIT 3
207207
#define _PY_GC_SCHEDULED_BIT 4
208208
#define _PY_EVAL_PLEASE_STOP_BIT 5
209+
#define _PY_EVAL_EXPLICIT_MERGE_BIT 6
209210

210211
/* Reserve a few bits for future use */
211212
#define _PY_EVAL_EVENTS_BITS 8

Include/internal/pycore_interp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ struct _is {
201201

202202
#if defined(Py_GIL_DISABLED)
203203
struct _mimalloc_interp_state mimalloc;
204+
struct _brc_state brc; // biased reference counting state
204205
#endif
205206

206207
// Per-interpreter state for the obmalloc allocator. For the main

Include/internal/pycore_object_stack.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef Py_INTERNAL_OBJECT_STACK_H
22
#define Py_INTERNAL_OBJECT_STACK_H
33

4+
#include "pycore_freelist.h" // _PyFreeListState
5+
46
#ifdef __cplusplus
57
extern "C" {
68
#endif
@@ -74,6 +76,10 @@ _PyObjectStack_Pop(_PyObjectStack *stack)
7476
return obj;
7577
}
7678

79+
// Merge src into dst, leaving src empty
80+
extern void
81+
_PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src);
82+
7783
// Remove all items from the stack
7884
extern void
7985
_PyObjectStack_Clear(_PyObjectStack *stack);

Include/internal/pycore_tstate.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extern "C" {
1010

1111
#include "pycore_freelist.h" // struct _Py_freelist_state
1212
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
13+
#include "pycore_brc.h" // struct _brc_thread_state
1314

1415

1516
// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The
@@ -22,6 +23,7 @@ typedef struct _PyThreadStateImpl {
2223
#ifdef Py_GIL_DISABLED
2324
struct _mimalloc_thread_state mimalloc;
2425
struct _Py_freelist_state freelist_state;
26+
struct _brc_thread_state brc;
2527
#endif
2628

2729
} _PyThreadStateImpl;

Lib/test/test_code.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,7 @@ def __init__(self, f, test):
865865
self.test = test
866866
def run(self):
867867
del self.f
868+
gc_collect()
868869
self.test.assertEqual(LAST_FREED, 500)
869870

870871
SetExtra(f.__code__, FREE_INDEX, ctypes.c_voidp(500))

Lib/test/test_concurrent_futures/executor.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import threading
22
import time
3+
import unittest
34
import weakref
45
from concurrent import futures
56
from test import support
7+
from test.support import Py_GIL_DISABLED
68

79

810
def mul(x, y):
@@ -83,10 +85,21 @@ def test_no_stale_references(self):
8385
my_object_collected = threading.Event()
8486
my_object_callback = weakref.ref(
8587
my_object, lambda obj: my_object_collected.set())
86-
# Deliberately discarding the future.
87-
self.executor.submit(my_object.my_method)
88+
fut = self.executor.submit(my_object.my_method)
8889
del my_object
8990

91+
if Py_GIL_DISABLED:
92+
# Due to biased reference counting, my_object might only be
93+
# deallocated while the thread that created it runs -- if the
94+
# thread is paused waiting on an event, it may not merge the
95+
# refcount of the queued object. For that reason, we wait for the
96+
# task to finish (so that it's no longer referenced) and force a
97+
# GC to ensure that it is collected.
98+
fut.result() # Wait for the task to finish.
99+
support.gc_collect()
100+
else:
101+
del fut # Deliberately discard the future.
102+
90103
collected = my_object_collected.wait(timeout=support.SHORT_TIMEOUT)
91104
self.assertTrue(collected,
92105
"Stale reference not collected within timeout.")

Lib/test/test_concurrent_futures/test_process_pool.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test_ressources_gced_in_workers(self):
9898

9999
# explicitly destroy the object to ensure that EventfulGCObj.__del__()
100100
# is called while manager is still running.
101+
support.gc_collect()
101102
obj = None
102103
support.gc_collect()
103104

Makefile.pre.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ PYTHON_OBJS= \
405405
Python/ast_opt.o \
406406
Python/ast_unparse.o \
407407
Python/bltinmodule.o \
408+
Python/brc.o \
408409
Python/ceval.o \
409410
Python/codecs.o \
410411
Python/compile.o \
@@ -1081,6 +1082,7 @@ PYTHON_HEADERS= \
10811082
$(srcdir)/Include/internal/pycore_atexit.h \
10821083
$(srcdir)/Include/internal/pycore_bitutils.h \
10831084
$(srcdir)/Include/internal/pycore_blocks_output_buffer.h \
1085+
$(srcdir)/Include/internal/pycore_brc.h \
10841086
$(srcdir)/Include/internal/pycore_bytes_methods.h \
10851087
$(srcdir)/Include/internal/pycore_bytesobject.h \
10861088
$(srcdir)/Include/internal/pycore_call.h \

Modules/posixmodule.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,10 @@ PyOS_AfterFork_Child(void)
637637
tstate->native_thread_id = PyThread_get_thread_native_id();
638638
#endif
639639

640+
#ifdef Py_GIL_DISABLED
641+
_Py_brc_after_fork(tstate->interp);
642+
#endif
643+
640644
status = _PyEval_ReInitThreads(tstate);
641645
if (_PyStatus_EXCEPTION(status)) {
642646
goto fatal_error;

Objects/dictobject.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5989,6 +5989,18 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values)
59895989
return make_dict_from_instance_attributes(interp, keys, values);
59905990
}
59915991

5992+
static bool
5993+
has_unique_reference(PyObject *op)
5994+
{
5995+
#ifdef Py_GIL_DISABLED
5996+
return (_Py_IsOwnedByCurrentThread(op) &&
5997+
op->ob_ref_local == 1 &&
5998+
_Py_atomic_load_ssize_relaxed(&op->ob_ref_shared) == 0);
5999+
#else
6000+
return Py_REFCNT(op) == 1;
6001+
#endif
6002+
}
6003+
59926004
// Return true if the dict was dematerialized, false otherwise.
59936005
bool
59946006
_PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
@@ -6005,7 +6017,9 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)
60056017
return false;
60066018
}
60076019
assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE));
6008-
if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) {
6020+
if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) ||
6021+
!has_unique_reference((PyObject *)dict))
6022+
{
60096023
return false;
60106024
}
60116025
assert(dict->ma_values);

Objects/object.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/* Generic object operations; and implementation of None */
33

44
#include "Python.h"
5+
#include "pycore_brc.h" // _Py_brc_queue_object()
56
#include "pycore_call.h" // _PyObject_CallNoArgs()
67
#include "pycore_ceval.h" // _Py_EnterRecursiveCallTstate()
78
#include "pycore_context.h" // _PyContextTokenMissing_Type
@@ -344,15 +345,10 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno)
344345
&shared, new_shared));
345346

346347
if (should_queue) {
347-
// TODO: the inter-thread queue is not yet implemented. For now,
348-
// we just merge the refcount here.
349348
#ifdef Py_REF_DEBUG
350349
_Py_IncRefTotal(_PyInterpreterState_GET());
351350
#endif
352-
Py_ssize_t refcount = _Py_ExplicitMergeRefcount(o, -1);
353-
if (refcount == 0) {
354-
_Py_Dealloc(o);
355-
}
351+
_Py_brc_queue_object(o);
356352
}
357353
else if (new_shared == _Py_REF_MERGED) {
358354
// refcount is zero AND merged

PCbuild/_freeze_module.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@
191191
<ClCompile Include="..\Python\ast_opt.c" />
192192
<ClCompile Include="..\Python\ast_unparse.c" />
193193
<ClCompile Include="..\Python\bltinmodule.c" />
194+
<ClCompile Include="..\Python\brc.c" />
194195
<ClCompile Include="..\Python\bootstrap_hash.c" />
195196
<ClCompile Include="..\Python\ceval.c" />
196197
<ClCompile Include="..\Python\codecs.c" />

PCbuild/_freeze_module.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
<ClCompile Include="..\Python\bltinmodule.c">
4747
<Filter>Source Files</Filter>
4848
</ClCompile>
49+
<ClCompile Include="..\Python\brc.c">
50+
<Filter>Python</Filter>
51+
</ClCompile>
4952
<ClCompile Include="..\Objects\boolobject.c">
5053
<Filter>Source Files</Filter>
5154
</ClCompile>

PCbuild/pythoncore.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@
206206
<ClInclude Include="..\Include\internal\pycore_ast_state.h" />
207207
<ClInclude Include="..\Include\internal\pycore_atexit.h" />
208208
<ClInclude Include="..\Include\internal\pycore_bitutils.h" />
209+
<ClInclude Include="..\Include\internal\pycore_brc.h" />
209210
<ClInclude Include="..\Include\internal\pycore_bytes_methods.h" />
210211
<ClInclude Include="..\Include\internal\pycore_bytesobject.h" />
211212
<ClInclude Include="..\Include\internal\pycore_call.h" />
@@ -553,6 +554,7 @@
553554
<ClCompile Include="..\Python\ast_unparse.c" />
554555
<ClCompile Include="..\Python\bltinmodule.c" />
555556
<ClCompile Include="..\Python\bootstrap_hash.c" />
557+
<ClCompile Include="..\Python\brc.c" />
556558
<ClCompile Include="..\Python\ceval.c" />
557559
<ClCompile Include="..\Python\codecs.c" />
558560
<ClCompile Include="..\Python\compile.c" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@
546546
<ClInclude Include="..\Include\internal\pycore_bitutils.h">
547547
<Filter>Include\internal</Filter>
548548
</ClInclude>
549+
<ClInclude Include="..\Include\internal\pycore_brc.h">
550+
<Filter>Include\internal</Filter>
551+
</ClInclude>
549552
<ClInclude Include="..\Include\internal\pycore_bytes_methods.h">
550553
<Filter>Include\internal</Filter>
551554
</ClInclude>
@@ -1253,6 +1256,9 @@
12531256
<ClCompile Include="..\Python\bltinmodule.c">
12541257
<Filter>Python</Filter>
12551258
</ClCompile>
1259+
<ClCompile Include="..\Python\brc.c">
1260+
<Filter>Python</Filter>
1261+
</ClCompile>
12561262
<ClCompile Include="..\Python\ceval.c">
12571263
<Filter>Python</Filter>
12581264
</ClCompile>

0 commit comments

Comments
 (0)