Skip to content

gh-115999: Add partial free-thread specialization for BINARY_SUBSCR #127227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
extern void _PyList_DebugMallocStats(FILE *out);
extern PyObject* _PyList_GetItemRef(PyObject *, Py_ssize_t i);

#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)

Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 75 additions & 28 deletions Lib/test/test_opcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase):
@cpython_only
@requires_specialization_ft
def test_binary_op(self):
def f():
def binary_op_add_int():
for _ in range(100):
a, b = 1, 2
c = a + b
self.assertEqual(c, 3)

f()
self.assert_specialized(f, "BINARY_OP_ADD_INT")
self.assert_no_opcode(f, "BINARY_OP")
binary_op_add_int()
self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT")
self.assert_no_opcode(binary_op_add_int, "BINARY_OP")

def g():
def binary_op_add_unicode():
for _ in range(100):
a, b = "foo", "bar"
c = a + b
self.assertEqual(c, "foobar")

g()
self.assert_specialized(g, "BINARY_OP_ADD_UNICODE")
self.assert_no_opcode(g, "BINARY_OP")
binary_op_add_unicode()
self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE")
self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP")

@cpython_only
@requires_specialization_ft
def test_contain_op(self):
def f():
def contains_op_dict():
for _ in range(100):
a, b = 1, {1: 2, 2: 5}
self.assertTrue(a in b)
self.assertFalse(3 in b)

f()
self.assert_specialized(f, "CONTAINS_OP_DICT")
self.assert_no_opcode(f, "CONTAINS_OP")
contains_op_dict()
self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT")
self.assert_no_opcode(contains_op_dict, "CONTAINS_OP")

def g():
def contains_op_set():
for _ in range(100):
a, b = 1, {1, 2}
self.assertTrue(a in b)
self.assertFalse(3 in b)

g()
self.assert_specialized(g, "CONTAINS_OP_SET")
self.assert_no_opcode(g, "CONTAINS_OP")
contains_op_set()
self.assert_specialized(contains_op_set, "CONTAINS_OP_SET")
self.assert_no_opcode(contains_op_set, "CONTAINS_OP")

@cpython_only
@requires_specialization_ft
Expand Down Expand Up @@ -1342,34 +1342,81 @@ def to_bool_str():
@cpython_only
@requires_specialization_ft
def test_unpack_sequence(self):
def f():
def unpack_sequence_two_tuple():
for _ in range(100):
a, b = 1, 2
self.assertEqual(a, 1)
self.assertEqual(b, 2)

f()
self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE")
self.assert_no_opcode(f, "UNPACK_SEQUENCE")
unpack_sequence_two_tuple()
self.assert_specialized(unpack_sequence_two_tuple,
"UNPACK_SEQUENCE_TWO_TUPLE")
self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE")

def g():
def unpack_sequence_tuple():
for _ in range(100):
a, = 1,
self.assertEqual(a, 1)

g()
self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE")
self.assert_no_opcode(g, "UNPACK_SEQUENCE")
unpack_sequence_tuple()
self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE")
self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE")

def x():
def unpack_sequence_list():
for _ in range(100):
a, b = [1, 2]
self.assertEqual(a, 1)
self.assertEqual(b, 2)

x()
self.assert_specialized(x, "UNPACK_SEQUENCE_LIST")
self.assert_no_opcode(x, "UNPACK_SEQUENCE")
unpack_sequence_list()
self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST")
self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE")

@cpython_only
@requires_specialization_ft
def test_binary_subscr(self):
def binary_subscr_list_int():
for _ in range(100):
a = [1, 2, 3]
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)

binary_subscr_list_int()
self.assert_specialized(binary_subscr_list_int,
"BINARY_SUBSCR_LIST_INT")
self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR")

def binary_subscr_tuple_int():
for _ in range(100):
a = (1, 2, 3)
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)

binary_subscr_tuple_int()
self.assert_specialized(binary_subscr_tuple_int,
"BINARY_SUBSCR_TUPLE_INT")
self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR")

def binary_subscr_dict():
for _ in range(100):
a = {1: 2, 2: 3}
self.assertEqual(a[1], 2)
self.assertEqual(a[2], 3)

binary_subscr_dict()
self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT")
self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR")

def binary_subscr_str_int():
for _ in range(100):
a = "foobar"
for idx, expected in enumerate(a):
self.assertEqual(a[idx], expected)

binary_subscr_str_int()
self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT")
self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR")


if __name__ == "__main__":
unittest.main()
6 changes: 6 additions & 0 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i)
return item;
}

PyObject *
_PyList_GetItemRef(PyObject *op, Py_ssize_t i)
{
return list_get_item_ref((PyListObject *)op, i);
}

int
PyList_SetItem(PyObject *op, Py_ssize_t i,
PyObject *newitem)
Expand Down
10 changes: 8 additions & 2 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ dummy_func(
};

specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
#if ENABLE_SPECIALIZATION
#if ENABLE_SPECIALIZATION_FT
assert(frame->stackpointer == NULL);
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
Expand All @@ -713,7 +713,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(BINARY_SUBSCR);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
#endif /* ENABLE_SPECIALIZATION */
#endif /* ENABLE_SPECIALIZATION_FT */
}

op(_BINARY_SUBSCR, (container, sub -- res)) {
Expand Down Expand Up @@ -790,11 +790,17 @@ dummy_func(
// Deopt unless 0 <= sub < PyList_Size(list)
DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
#ifdef Py_GIL_DISABLED
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res_o = _PyList_GetItemRef(list, index);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default build of list_get_item_ref is almost same as current implementation,
we might able to this implementation to the default build also.

#else
static inline PyObject*
list_get_item_ref(PyListObject *op, Py_ssize_t i)
{
if (!valid_index(i, Py_SIZE(op))) {
return NULL;
}
return Py_NewRef(PyList_GET_ITEM(op, i));
}
#endif

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would certainly be cleaner, but it might come with some performance cost in the default build.

DEOPT_IF(res_o == NULL);
#else
DEOPT_IF(index >= PyList_GET_SIZE(list));
STAT_INC(BINARY_SUBSCR, hit);
PyObject *res_o = PyList_GET_ITEM(list, index);
assert(res_o != NULL);
Py_INCREF(res_o);
#endif
PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
DEAD(sub_st);
PyStackRef_CLOSE(list_st);
Expand Down
11 changes: 11 additions & 0 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 11 additions & 14 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -1717,15 +1717,15 @@ _Py_Specialize_BinarySubscr(
PyObject *container = PyStackRef_AsPyObjectBorrow(container_st);
PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st);

assert(ENABLE_SPECIALIZATION);
assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[BINARY_SUBSCR] ==
INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
PyTypeObject *container_type = Py_TYPE(container);
uint8_t specialized_op;
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_LIST_INT;
specialized_op = BINARY_SUBSCR_LIST_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
Expand All @@ -1738,7 +1738,7 @@ _Py_Specialize_BinarySubscr(
if (container_type == &PyTuple_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_TUPLE_INT;
specialized_op = BINARY_SUBSCR_TUPLE_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
Expand All @@ -1751,7 +1751,7 @@ _Py_Specialize_BinarySubscr(
if (container_type == &PyUnicode_Type) {
if (PyLong_CheckExact(sub)) {
if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
instr->op.code = BINARY_SUBSCR_STR_INT;
specialized_op = BINARY_SUBSCR_STR_INT;
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
Expand All @@ -1762,9 +1762,10 @@ _Py_Specialize_BinarySubscr(
goto fail;
}
if (container_type == &PyDict_Type) {
instr->op.code = BINARY_SUBSCR_DICT;
specialized_op = BINARY_SUBSCR_DICT;
goto success;
}
#ifndef Py_GIL_DISABLED
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
Expand Down Expand Up @@ -1797,21 +1798,17 @@ _Py_Specialize_BinarySubscr(
// struct _specialization_cache):
ht->_spec_cache.getitem = descriptor;
ht->_spec_cache.getitem_version = version;
Comment on lines 1799 to 1800
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should make to be thread-safe by using LOCK_TYPE(), but it will be handled with separated PRs.

instr->op.code = BINARY_SUBSCR_GETITEM;
specialized_op = BINARY_SUBSCR_GETITEM;
goto success;
}
#endif // Py_GIL_DISABLED
SPECIALIZATION_FAIL(BINARY_SUBSCR,
binary_subscr_fail_kind(container_type, sub));
fail:
STAT_INC(BINARY_SUBSCR, failure);
assert(!PyErr_Occurred());
instr->op.code = BINARY_SUBSCR;
cache->counter = adaptive_counter_backoff(cache->counter);
unspecialize(instr);
return;
success:
STAT_INC(BINARY_SUBSCR, success);
assert(!PyErr_Occurred());
cache->counter = adaptive_counter_cooldown();
specialize(instr, specialized_op);
}


Expand Down
Loading