From 3c8575650ec6336dc08d0e745c4a7ead9b3d9197 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Mon, 4 Oct 2021 22:15:37 -0400 Subject: [PATCH 1/7] Implement specialized opcodes for BINARY_MULTIPLY --- Include/internal/pycore_code.h | 1 + Include/internal/pycore_long.h | 1 + Include/opcode.h | 55 ++++++++++++++++-------------- Lib/opcode.py | 3 ++ Objects/longobject.c | 13 ++++--- Python/ceval.c | 62 +++++++++++++++++++++++++++++++++- Python/opcode_targets.h | 42 +++++++++++------------ Python/specialize.c | 34 +++++++++++++++++++ 8 files changed, 159 insertions(+), 52 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 0b127ed28993b7..fb87959bef0505 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -308,6 +308,7 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_BinaryMultiply(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 7336c317c3f2c8..8edc90218547f5 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -35,6 +35,7 @@ static inline PyObject* _PyLong_GetOne(void) { return __PyLong_GetSmallInt_internal(1); } PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); +PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); #ifdef __cplusplus } diff --git a/Include/opcode.h b/Include/opcode.h index 27895255947837..41650e3edfc289 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -141,32 +141,35 @@ extern "C" { #define BINARY_ADD_FLOAT 13 #define BINARY_ADD_UNICODE 14 #define BINARY_ADD_UNICODE_INPLACE_FAST 18 -#define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_LIST_INT 36 -#define BINARY_SUBSCR_TUPLE_INT 38 -#define BINARY_SUBSCR_DICT 39 -#define JUMP_ABSOLUTE_QUICK 40 -#define LOAD_ATTR_ADAPTIVE 41 -#define LOAD_ATTR_SPLIT_KEYS 42 -#define LOAD_ATTR_WITH_HINT 43 -#define LOAD_ATTR_SLOT 44 -#define LOAD_ATTR_MODULE 45 -#define LOAD_GLOBAL_ADAPTIVE 46 -#define LOAD_GLOBAL_MODULE 47 -#define LOAD_GLOBAL_BUILTIN 48 -#define LOAD_METHOD_ADAPTIVE 58 -#define LOAD_METHOD_CACHED 80 -#define LOAD_METHOD_CLASS 81 -#define LOAD_METHOD_MODULE 87 -#define STORE_ATTR_ADAPTIVE 88 -#define STORE_ATTR_SPLIT_KEYS 120 -#define STORE_ATTR_SLOT 122 -#define STORE_ATTR_WITH_HINT 123 -#define LOAD_FAST__LOAD_FAST 127 -#define STORE_FAST__LOAD_FAST 128 -#define LOAD_FAST__LOAD_CONST 134 -#define LOAD_CONST__LOAD_FAST 140 -#define STORE_FAST__STORE_FAST 143 +#define BINARY_MULTIPLY_ADAPTIVE 21 +#define BINARY_MULTIPLY_INT 36 +#define BINARY_MULTIPLY_FLOAT 38 +#define BINARY_SUBSCR_ADAPTIVE 39 +#define BINARY_SUBSCR_LIST_INT 40 +#define BINARY_SUBSCR_TUPLE_INT 41 +#define BINARY_SUBSCR_DICT 42 +#define JUMP_ABSOLUTE_QUICK 43 +#define LOAD_ATTR_ADAPTIVE 44 +#define LOAD_ATTR_SPLIT_KEYS 45 +#define LOAD_ATTR_WITH_HINT 46 +#define LOAD_ATTR_SLOT 47 +#define LOAD_ATTR_MODULE 48 +#define LOAD_GLOBAL_ADAPTIVE 58 +#define LOAD_GLOBAL_MODULE 80 +#define LOAD_GLOBAL_BUILTIN 81 +#define LOAD_METHOD_ADAPTIVE 87 +#define LOAD_METHOD_CACHED 88 +#define LOAD_METHOD_CLASS 120 +#define LOAD_METHOD_MODULE 122 +#define STORE_ATTR_ADAPTIVE 123 +#define STORE_ATTR_SPLIT_KEYS 127 +#define STORE_ATTR_SLOT 128 +#define STORE_ATTR_WITH_HINT 134 +#define LOAD_FAST__LOAD_FAST 140 +#define STORE_FAST__LOAD_FAST 143 +#define LOAD_FAST__LOAD_CONST 149 +#define LOAD_CONST__LOAD_FAST 150 +#define STORE_FAST__STORE_FAST 151 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 5d356746888757..f6f03754313ef0 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -225,6 +225,9 @@ def jabs_op(name, op): "BINARY_ADD_FLOAT", "BINARY_ADD_UNICODE", "BINARY_ADD_UNICODE_INPLACE_FAST", + "BINARY_MULTIPLY_ADAPTIVE", + "BINARY_MULTIPLY_INT", + "BINARY_MULTIPLY_FLOAT", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", diff --git a/Objects/longobject.c b/Objects/longobject.c index 33fea6491b73d2..4173244c8c011e 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3593,13 +3593,11 @@ k_lopsided_mul(PyLongObject *a, PyLongObject *b) return NULL; } -static PyObject * -long_mul(PyLongObject *a, PyLongObject *b) +PyObject * +_PyLong_Multiply(PyLongObject *a, PyLongObject *b) { PyLongObject *z; - CHECK_BINOP(a, b); - /* fast path for single-digit multiplication */ if (IS_MEDIUM_VALUE(a) && IS_MEDIUM_VALUE(b)) { stwodigits v = medium_value(a) * medium_value(b); @@ -3616,6 +3614,13 @@ long_mul(PyLongObject *a, PyLongObject *b) return (PyObject *)z; } +static PyObject * +long_mul(PyLongObject *a, PyLongObject *b) +{ + CHECK_BINOP(a, b); + return _PyLong_Multiply(a, b); +} + /* Fast modulo division for single-digit longs. */ static PyObject * fast_mod(PyLongObject *a, PyLongObject *b) diff --git a/Python/ceval.c b/Python/ceval.c index 8f65bb3aec4bc1..0eaab6de05e254 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1929,14 +1929,73 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(BINARY_MULTIPLY): { + PREDICTED(BINARY_MULTIPLY); + STAT_INC(BINARY_MULTIPLY, unquickened); PyObject *right = POP(); PyObject *left = TOP(); PyObject *res = PyNumber_Multiply(left, right); Py_DECREF(left); Py_DECREF(right); SET_TOP(res); - if (res == NULL) + if (res == NULL) { goto error; + } + DISPATCH(); + } + + TARGET(BINARY_MULTIPLY_ADAPTIVE): { + if (oparg == 0) { + PyObject *left = SECOND(); + PyObject *right = TOP(); + next_instr--; + if (_Py_Specialize_BinaryMultiply(left, right, next_instr) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(BINARY_MULTIPLY, deferred); + UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); + STAT_DEC(BINARY_MULTIPLY, unquickened); + JUMP_TO_INSTRUCTION(BINARY_MULTIPLY); + } + } + + TARGET(BINARY_MULTIPLY_INT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyLong_CheckExact(left), BINARY_MULTIPLY); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_MULTIPLY); + STAT_INC(BINARY_MULTIPLY, hit); + record_hit_inline(next_instr, oparg); + PyObject *sum = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_MULTIPLY_FLOAT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_MULTIPLY); + STAT_INC(BINARY_MULTIPLY, hit); + record_hit_inline(next_instr, oparg); + double dprod = ((PyFloatObject *)left)->ob_fval * + ((PyFloatObject *)right)->ob_fval; + PyObject *sum = PyFloat_FromDouble(dprod); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } DISPATCH(); } @@ -4866,6 +4925,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) MISS_WITH_OPARG_COUNTER(BINARY_ADD) +MISS_WITH_OPARG_COUNTER(BINARY_MULTIPLY) binary_subscr_dict_error: { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index f3bfae545bcd48..b12c30344dbd40 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,7 +20,7 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_MULTIPLY_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,8 +35,11 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_BINARY_MULTIPLY_INT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_BINARY_MULTIPLY_FLOAT, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_JUMP_ABSOLUTE_QUICK, @@ -45,9 +48,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, - &&TARGET_LOAD_GLOBAL_ADAPTIVE, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_METHOD_CACHED, - &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_METHOD_MODULE, - &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_METHOD_CACHED, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,38 +119,38 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&TARGET_LOAD_FAST__LOAD_FAST, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_STORE_FAST__STORE_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_MATCH_CLASS, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 1ab79bf3ea0c5d..9e6cc5189bd895 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -123,6 +123,7 @@ _Py_GetSpecializationStats(void) { err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); err += add_stat_dict(stats, LOAD_METHOD, "load_method"); err += add_stat_dict(stats, BINARY_ADD, "binary_add"); + err += add_stat_dict(stats, BINARY_MULTIPLY, "binary_multiply"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); if (err < 0) { @@ -179,6 +180,7 @@ _Py_PrintSpecializationStats(void) print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global"); print_stats(out, &_specialization_stats[LOAD_METHOD], "load_method"); print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); + print_stats(out, &_specialization_stats[BINARY_MULTIPLY], "binary_multiply"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); if (out != stderr) { @@ -229,6 +231,7 @@ static uint8_t adaptive_opcodes[256] = { [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, [BINARY_ADD] = BINARY_ADD_ADAPTIVE, + [BINARY_MULTIPLY] = BINARY_MULTIPLY_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, }; @@ -239,6 +242,7 @@ static uint8_t cache_requirements[256] = { [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ [BINARY_ADD] = 0, + [BINARY_MULTIPLY] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -1195,3 +1199,33 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) assert(!PyErr_Occurred()); return 0; } + +int +_Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) +{ + PyTypeObject *left_type = Py_TYPE(left); + if (left_type != Py_TYPE(right)) { + SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_DIFFERENT_TYPES); + goto fail; + } + if (left_type == &PyLong_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, saturating_start()); + goto success; + } + else if (left_type == &PyFloat_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); + goto success; + } + else { + SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_OTHER); + } +fail: + STAT_INC(BINARY_MULTIPLY, specialization_failure); + assert(!PyErr_Occurred()); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); + return 0; +success: + STAT_INC(BINARY_MULTIPLY, specialization_success); + assert(!PyErr_Occurred()); + return 0; +} From 300a0cac94059f92660f6c85fbb72dd368cb0eac Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 5 Oct 2021 03:49:09 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2021-10-05-03-49-07.bpo-45367._astoU.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-10-05-03-49-07.bpo-45367._astoU.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-05-03-49-07.bpo-45367._astoU.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-05-03-49-07.bpo-45367._astoU.rst new file mode 100644 index 00000000000000..aae518da1e86f8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-05-03-49-07.bpo-45367._astoU.rst @@ -0,0 +1 @@ +Specialized the ``BINARY_MULTIPLY`` opcode to ``BINARY_MULTIPLY_INT`` and ``BINARY_MULTIPLY_FLOAT`` using the PEP 659 machinery. \ No newline at end of file From c64540af47ff336b074611d33e130e06cc06e5c2 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 5 Oct 2021 09:36:24 -0400 Subject: [PATCH 3/7] Apply changes from code review --- Include/internal/pycore_code.h | 4 ++-- Python/ceval.c | 4 ++-- Python/specialize.c | 7 +++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index fb87959bef0505..a91209b39f654a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -307,8 +307,8 @@ int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *nam int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -int _Py_Specialize_BinaryMultiply(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); +int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Python/ceval.c b/Python/ceval.c index 8299737ad2e561..1958929a1e9d49 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1952,7 +1952,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyLong_CheckExact(left), BINARY_MULTIPLY); - DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_MULTIPLY); + DEOPT_IF(!PyLong_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); PyObject *sum = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); @@ -1970,7 +1970,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY); - DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_MULTIPLY); + DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); double dprod = ((PyFloatObject *)left)->ob_fval * diff --git a/Python/specialize.c b/Python/specialize.c index 9e6cc5189bd895..d3b626a37464e6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1203,16 +1203,15 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) { - PyTypeObject *left_type = Py_TYPE(left); - if (left_type != Py_TYPE(right)) { + if (!Py_IS_TYPE(left, Py_TYPE(right))) { SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_DIFFERENT_TYPES); goto fail; } - if (left_type == &PyLong_Type) { + if (PyLong_CheckExact(left)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, saturating_start()); goto success; } - else if (left_type == &PyFloat_Type) { + else if (PyFloat_CheckExact(left)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); goto success; } From 5a44ecc73e3e7adcb7f614f2cecff95dee0dc362 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 5 Oct 2021 14:57:22 -0400 Subject: [PATCH 4/7] Allow float/int mixtures is BINARY_MULTIPLY_FLOAT --- Python/ceval.c | 36 +++++++++++++++++++++++++++++------- Python/specialize.c | 25 ++++++++++++++++--------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 1958929a1e9d49..54e64495b132f2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1969,18 +1969,40 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(BINARY_MULTIPLY_FLOAT) { PyObject *left = SECOND(); PyObject *right = TOP(); - DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY); - DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); + double dleft, dright; + if (PyFloat_CheckExact(left)) { + dleft = ((PyFloatObject *)left)->ob_fval; + if (PyFloat_CheckExact(right)) { + dright = ((PyFloatObject *)right)->ob_fval; + } + else if (PyLong_CheckExact(right) + && (((size_t)Py_SIZE(right)) + 1U < 3U)) { + dright = (double)(((stwodigits)Py_SIZE(right)) + * ((PyLongObject *)right)->ob_digit[0]); + } + else { + DEOPT_IF(1, BINARY_MULTIPLY); + } + } + else if (PyLong_CheckExact(left) + && (((size_t)Py_SIZE(left)) + 1U < 3U)) { + DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); + dleft = (double)(((stwodigits)Py_SIZE(left)) + * ((PyLongObject *)left)->ob_digit[0]); + dright = ((PyFloatObject *)right)->ob_fval; + } + else { + DEOPT_IF(1, BINARY_MULTIPLY); + } STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); - double dprod = ((PyFloatObject *)left)->ob_fval * - ((PyFloatObject *)right)->ob_fval; - PyObject *sum = PyFloat_FromDouble(dprod); - SET_SECOND(sum); + double dprod = dleft * dright; + PyObject *prod = PyFloat_FromDouble(dprod); + SET_SECOND(prod); Py_DECREF(right); Py_DECREF(left); STACK_SHRINK(1); - if (sum == NULL) { + if (prod == NULL) { goto error; } DISPATCH(); diff --git a/Python/specialize.c b/Python/specialize.c index d3b626a37464e6..c7aa672113241b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -455,6 +455,11 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 +/* Binary Multiply */ +#define SPEC_FAIL_STR_INT 13 +#define SPEC_FAIL_INT_STR 14 +#define SPEC_FAIL_FLOAT_INT 15 +#define SPEC_FAIL_INT_FLOAT 16 static int specialize_module_load_attr( @@ -1203,22 +1208,24 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) { - if (!Py_IS_TYPE(left, Py_TYPE(right))) { - SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_DIFFERENT_TYPES); - goto fail; - } - if (PyLong_CheckExact(left)) { + if (PyLong_CheckExact(left) && PyLong_CheckExact(right)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, saturating_start()); goto success; } - else if (PyFloat_CheckExact(left)) { + else if (PyFloat_CheckExact(left) && PyFloat_CheckExact(right)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); goto success; } - else { - SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_OTHER); + else if (PyFloat_CheckExact(left) || PyFloat_CheckExact(right)) { + if ((PyLong_CheckExact(left) && Py_ABS(Py_SIZE(left)) <= 1) + || (PyLong_CheckExact(right) && Py_ABS(Py_SIZE(right)) <= 1) + ) { + // Mixing float and int => multiply as floats. + *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); + goto success; + } } -fail: + SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_OTHER); STAT_INC(BINARY_MULTIPLY, specialization_failure); assert(!PyErr_Occurred()); *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); From aea424ead30756f11d29e10643ccbfda82567676 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 5 Oct 2021 17:54:03 -0400 Subject: [PATCH 5/7] Minor cleanups and comments --- Python/ceval.c | 10 ++++++---- Python/specialize.c | 5 ----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 54e64495b132f2..ce152740639026 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1973,10 +1973,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr if (PyFloat_CheckExact(left)) { dleft = ((PyFloatObject *)left)->ob_fval; if (PyFloat_CheckExact(right)) { + // float * float dright = ((PyFloatObject *)right)->ob_fval; } else if (PyLong_CheckExact(right) && (((size_t)Py_SIZE(right)) + 1U < 3U)) { + // float * one_digit_int dright = (double)(((stwodigits)Py_SIZE(right)) * ((PyLongObject *)right)->ob_digit[0]); } @@ -1985,8 +1987,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } } else if (PyLong_CheckExact(left) - && (((size_t)Py_SIZE(left)) + 1U < 3U)) { - DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); + && (((size_t)Py_SIZE(left)) + 1U < 3U) + && PyFloat_CheckExact(right)) { + // one_digit_int * float dleft = (double)(((stwodigits)Py_SIZE(left)) * ((PyLongObject *)left)->ob_digit[0]); dright = ((PyFloatObject *)right)->ob_fval; @@ -1996,8 +1999,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); - double dprod = dleft * dright; - PyObject *prod = PyFloat_FromDouble(dprod); + PyObject *prod = PyFloat_FromDouble(dleft * dright); SET_SECOND(prod); Py_DECREF(right); Py_DECREF(left); diff --git a/Python/specialize.c b/Python/specialize.c index c7aa672113241b..5d8b0c8486650d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -455,11 +455,6 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 -/* Binary Multiply */ -#define SPEC_FAIL_STR_INT 13 -#define SPEC_FAIL_INT_STR 14 -#define SPEC_FAIL_FLOAT_INT 15 -#define SPEC_FAIL_INT_FLOAT 16 static int specialize_module_load_attr( From 37d371696c779c649872c45d7da8073b60359096 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 7 Oct 2021 10:29:15 -0400 Subject: [PATCH 6/7] Revert the float/int mixtures --- Python/ceval.c | 36 ++++++------------------------------ Python/specialize.c | 20 +++++++++----------- 2 files changed, 15 insertions(+), 41 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index ce152740639026..a2fa8e433b6f64 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1969,42 +1969,18 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(BINARY_MULTIPLY_FLOAT) { PyObject *left = SECOND(); PyObject *right = TOP(); - double dleft, dright; - if (PyFloat_CheckExact(left)) { - dleft = ((PyFloatObject *)left)->ob_fval; - if (PyFloat_CheckExact(right)) { - // float * float - dright = ((PyFloatObject *)right)->ob_fval; - } - else if (PyLong_CheckExact(right) - && (((size_t)Py_SIZE(right)) + 1U < 3U)) { - // float * one_digit_int - dright = (double)(((stwodigits)Py_SIZE(right)) - * ((PyLongObject *)right)->ob_digit[0]); - } - else { - DEOPT_IF(1, BINARY_MULTIPLY); - } - } - else if (PyLong_CheckExact(left) - && (((size_t)Py_SIZE(left)) + 1U < 3U) - && PyFloat_CheckExact(right)) { - // one_digit_int * float - dleft = (double)(((stwodigits)Py_SIZE(left)) - * ((PyLongObject *)left)->ob_digit[0]); - dright = ((PyFloatObject *)right)->ob_fval; - } - else { - DEOPT_IF(1, BINARY_MULTIPLY); - } + DEOPT_IF(!PyFloat_CheckExact(left), BINARY_MULTIPLY); + DEOPT_IF(!PyFloat_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); - PyObject *prod = PyFloat_FromDouble(dleft * dright); + double dprod = ((PyFloatObject *)left)->ob_fval * + ((PyFloatObject *)right)->ob_fval; + PyObject *prod = PyFloat_FromDouble(dprod); SET_SECOND(prod); Py_DECREF(right); Py_DECREF(left); STACK_SHRINK(1); - if (prod == NULL) { + if (sum == NULL) { goto error; } DISPATCH(); diff --git a/Python/specialize.c b/Python/specialize.c index 5d8b0c8486650d..d3b626a37464e6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1203,24 +1203,22 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) int _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) { - if (PyLong_CheckExact(left) && PyLong_CheckExact(right)) { + if (!Py_IS_TYPE(left, Py_TYPE(right))) { + SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_DIFFERENT_TYPES); + goto fail; + } + if (PyLong_CheckExact(left)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_INT, saturating_start()); goto success; } - else if (PyFloat_CheckExact(left) && PyFloat_CheckExact(right)) { + else if (PyFloat_CheckExact(left)) { *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); goto success; } - else if (PyFloat_CheckExact(left) || PyFloat_CheckExact(right)) { - if ((PyLong_CheckExact(left) && Py_ABS(Py_SIZE(left)) <= 1) - || (PyLong_CheckExact(right) && Py_ABS(Py_SIZE(right)) <= 1) - ) { - // Mixing float and int => multiply as floats. - *instr = _Py_MAKECODEUNIT(BINARY_MULTIPLY_FLOAT, saturating_start()); - goto success; - } + else { + SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_OTHER); } - SPECIALIZATION_FAIL(BINARY_MULTIPLY, SPEC_FAIL_OTHER); +fail: STAT_INC(BINARY_MULTIPLY, specialization_failure); assert(!PyErr_Occurred()); *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); From 903ff9c7d66186b9364071f4a43faa7afb86f6c7 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 7 Oct 2021 10:39:46 -0400 Subject: [PATCH 7/7] typo: sum --> prod --- Python/ceval.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index a2fa8e433b6f64..69e15aeb2593cd 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1955,12 +1955,12 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyLong_CheckExact(right), BINARY_MULTIPLY); STAT_INC(BINARY_MULTIPLY, hit); record_hit_inline(next_instr, oparg); - PyObject *sum = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); - SET_SECOND(sum); + PyObject *prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + SET_SECOND(prod); Py_DECREF(right); Py_DECREF(left); STACK_SHRINK(1); - if (sum == NULL) { + if (prod == NULL) { goto error; } DISPATCH(); @@ -1980,7 +1980,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr Py_DECREF(right); Py_DECREF(left); STACK_SHRINK(1); - if (sum == NULL) { + if (prod == NULL) { goto error; } DISPATCH();