From f58aa935bc10eea1446810e32c56e405e015160f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 27 Aug 2023 04:09:28 +0100 Subject: [PATCH 1/5] Remove non-debugging used of '#if TIER_ONE' from _POP_FRAME. --- Python/bytecodes.c | 36 ++++++++++----- Python/ceval.c | 2 +- Python/ceval_macros.h | 45 ++++++++++++++++++ Python/executor.c | 2 +- Python/executor_cases.c.h | 36 ++++++++++----- Python/generated_cases.c.h | 76 ++++++++++++++++++++++--------- Tools/cases_generator/stacking.py | 4 +- 7 files changed, 153 insertions(+), 48 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 93926c03421eb7..0715231ebb49d2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -768,24 +768,38 @@ dummy_func( // different frame, and it's accounted for by _PUSH_FRAME. op(_POP_FRAME, (retval --)) { assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; - #if TIER_ONE - assert(frame != &entry_frame); - #endif frame = tstate->current_frame = dying->previous; _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); - #if TIER_ONE - goto resume_frame; - #endif - #if TIER_TWO - stack_pointer = _PyFrame_GetStackPointer(frame); - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; - #endif + LOAD_SP(); + LOAD_IP(); + +#if LLTRACE && TIER_ONE + { + if (frame != &entry_frame && GLOBALS()) { + int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); + if (r < 0) { + goto exit_unwind; + } + lltrace = r; + if (!lltrace) { + // When tracing executed uops, also trace bytecode + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that + } + } + } + if (lltrace) { + lltrace_resume_frame(frame); + } + } +#endif } macro(RETURN_VALUE) = diff --git a/Python/ceval.c b/Python/ceval.c index a56d31ea073639..b45f769530a92a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -576,6 +576,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) return _PyEval_EvalFrame(tstate, f->f_frame, throwflag); } +#define TIER_ONE 1 #include "ceval_macros.h" @@ -752,7 +753,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int #endif { -#define TIER_ONE 1 #include "generated_cases.c.h" /* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c, diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 635b8e501e523e..b35a71d7726dca 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -373,3 +373,48 @@ static inline int _Py_EnterRecursivePy(PyThreadState *tstate) { static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { tstate->py_recursion_remaining++; } + + +/* Implementation of "macros" that modify the instruction pointer, + * stack pointer, or frame pointer. + * These need to treated differently by tier 1 and 2. */ + +#if TIER_ONE + +#define STORE_IP() \ +do {frame->prev_instr = next_instr-1; } while (0) + + +#define LOAD_IP() \ +do { next_instr = frame->prev_instr+1; } while (0) + +#define STORE_SP() \ +_PyFrame_SetStackPointer(frame, stack_pointer) + +#define LOAD_SP() \ +stack_pointer = _PyFrame_GetStackPointer(frame); + +#endif + + +#if TIER_TWO + +#define STORE_IP() \ +do {frame->prev_instr = next_instr-1; } while (0) + + +#define LOAD_IP() \ +do { ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; } while (0) + +#define STORE_SP() \ +_PyFrame_SetStackPointer(frame, stack_pointer) + + +#define LOAD_SP() \ +stack_pointer = _PyFrame_GetStackPointer(frame); + +#endif + + + + diff --git a/Python/executor.c b/Python/executor.c index 0ff5106fe446ff..9b3262ed4165f1 100644 --- a/Python/executor.c +++ b/Python/executor.c @@ -17,6 +17,7 @@ #include "pycore_sliceobject.h" #include "pycore_uops.h" +#define TIER_TWO 2 #include "ceval_macros.h" @@ -83,7 +84,6 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject OBJECT_STAT_INC(optimization_uops_executed); switch (opcode) { -#define TIER_TWO 2 #include "executor_cases.c.h" default: diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1283cc7ebbf9c4..1d02428928b940 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -690,24 +690,38 @@ retval = stack_pointer[-1]; STACK_SHRINK(1); assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; - #if TIER_ONE - assert(frame != &entry_frame); - #endif frame = tstate->current_frame = dying->previous; _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); - #if TIER_ONE - goto resume_frame; - #endif - #if TIER_TWO - stack_pointer = _PyFrame_GetStackPointer(frame); - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; - #endif + LOAD_SP(); + LOAD_IP(); + +#if LLTRACE && TIER_ONE + { + if (frame != &entry_frame && GLOBALS()) { + int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); + if (r < 0) { + goto exit_unwind; + } + lltrace = r; + if (!lltrace) { + // When tracing executed uops, also trace bytecode + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that + } + } + } + if (lltrace) { + lltrace_resume_frame(frame); + } + } +#endif break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 5940c185817604..96312f2a4537f4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -990,25 +990,40 @@ STACK_SHRINK(1); { assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; - #if TIER_ONE - assert(frame != &entry_frame); - #endif frame = tstate->current_frame = dying->previous; _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); - #if TIER_ONE - goto resume_frame; - #endif - #if TIER_TWO - stack_pointer = _PyFrame_GetStackPointer(frame); - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; - #endif + LOAD_SP(); + LOAD_IP(); + + #if LLTRACE && TIER_ONE + { + if (frame != &entry_frame && GLOBALS()) { + int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); + if (r < 0) { + goto exit_unwind; + } + lltrace = r; + if (!lltrace) { + // When tracing executed uops, also trace bytecode + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that + } + } + } + if (lltrace) { + lltrace_resume_frame(frame); + } + } + #endif } + DISPATCH(); } TARGET(INSTRUMENTED_RETURN_VALUE) { @@ -1055,25 +1070,40 @@ retval = value; { assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); + STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; - #if TIER_ONE - assert(frame != &entry_frame); - #endif frame = tstate->current_frame = dying->previous; _PyEval_FrameClearAndPop(tstate, dying); frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); - #if TIER_ONE - goto resume_frame; - #endif - #if TIER_TWO - stack_pointer = _PyFrame_GetStackPointer(frame); - ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; - #endif + LOAD_SP(); + LOAD_IP(); + + #if LLTRACE && TIER_ONE + { + if (frame != &entry_frame && GLOBALS()) { + int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); + if (r < 0) { + goto exit_unwind; + } + lltrace = r; + if (!lltrace) { + // When tracing executed uops, also trace bytecode + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that + } + } + } + if (lltrace) { + lltrace_resume_frame(frame); + } + } + #endif } + DISPATCH(); } TARGET(INSTRUMENTED_RETURN_CONST) { @@ -3887,6 +3917,7 @@ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif } + DISPATCH(); } TARGET(CALL_PY_EXACT_ARGS) { @@ -3963,6 +3994,7 @@ ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; #endif } + DISPATCH(); } TARGET(CALL_PY_WITH_DEFAULTS) { diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 2d006ba8e5934a..dcdfc7ec054248 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -369,8 +369,8 @@ def write_macro_instr( next_instr_is_set = write_components(parts, out, TIER_ONE, mac.cache_offset) except AssertionError as err: raise AssertionError(f"Error writing macro {mac.name}") from err - if not parts[-1].instr.always_exits and not next_instr_is_set: - if mac.cache_offset: + if not parts[-1].instr.always_exits: + if not next_instr_is_set and mac.cache_offset: out.emit(f"next_instr += {mac.cache_offset};") out.emit("DISPATCH();") From b142c56073fcb9c8be45177f9b9de28b1849ca7b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 27 Aug 2023 09:44:10 +0100 Subject: [PATCH 2/5] Tidy up lltrace code a bit --- Python/bytecodes.c | 21 +++------------- Python/ceval.c | 49 ++++++++++++++++++++++++-------------- Python/executor_cases.c.h | 21 +++------------- Python/generated_cases.c.h | 42 +++++--------------------------- 4 files changed, 43 insertions(+), 90 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0715231ebb49d2..1dd9f43f33191e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -780,24 +780,9 @@ dummy_func( LOAD_IP(); #if LLTRACE && TIER_ONE - { - if (frame != &entry_frame && GLOBALS()) { - int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); - if (r < 0) { - goto exit_unwind; - } - lltrace = r; - if (!lltrace) { - // When tracing executed uops, also trace bytecode - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that - } - } - } - if (lltrace) { - lltrace_resume_frame(frame); - } + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; } #endif } diff --git a/Python/ceval.c b/Python/ceval.c index b45f769530a92a..a22852ec13ea99 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -189,6 +189,34 @@ lltrace_resume_frame(_PyInterpreterFrame *frame) fflush(stdout); PyErr_SetRaisedException(exc); } + +static int +maybe_lltrace_resume_frame(_PyInterpreterFrame *frame, _PyInterpreterFrame *skip_frame, PyObject *globals) +{ + if (globals == NULL) { + return 0; + } + if (frame == skip_frame) { + return 0; + } + int r = PyDict_Contains(globals, &_Py_ID(__lltrace__)); + if (r < 0) { + return -1; + } + int lltrace = r; + if (!lltrace) { + // When tracing executed uops, also trace bytecode + char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); + if (uop_debug != NULL && *uop_debug >= '0') { + lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that + } + } + if (lltrace) { + lltrace_resume_frame(frame); + } + return lltrace; +} + #endif static void monitor_raise(PyThreadState *tstate, @@ -715,24 +743,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int SET_LOCALS_FROM_FRAME(); #ifdef LLTRACE - { - if (frame != &entry_frame && GLOBALS()) { - int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); - if (r < 0) { - goto exit_unwind; - } - lltrace = r; - if (!lltrace) { - // When tracing executed uops, also trace bytecode - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that - } - } - } - if (lltrace) { - lltrace_resume_frame(frame); - } + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; } #endif diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1d02428928b940..3aebd5a38183ef 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -702,24 +702,9 @@ LOAD_IP(); #if LLTRACE && TIER_ONE - { - if (frame != &entry_frame && GLOBALS()) { - int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); - if (r < 0) { - goto exit_unwind; - } - lltrace = r; - if (!lltrace) { - // When tracing executed uops, also trace bytecode - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that - } - } - } - if (lltrace) { - lltrace_resume_frame(frame); - } + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; } #endif break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 96312f2a4537f4..54056eea4834e2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1002,24 +1002,9 @@ LOAD_IP(); #if LLTRACE && TIER_ONE - { - if (frame != &entry_frame && GLOBALS()) { - int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); - if (r < 0) { - goto exit_unwind; - } - lltrace = r; - if (!lltrace) { - // When tracing executed uops, also trace bytecode - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that - } - } - } - if (lltrace) { - lltrace_resume_frame(frame); - } + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; } #endif } @@ -1082,24 +1067,9 @@ LOAD_IP(); #if LLTRACE && TIER_ONE - { - if (frame != &entry_frame && GLOBALS()) { - int r = PyDict_Contains(GLOBALS(), &_Py_ID(__lltrace__)); - if (r < 0) { - goto exit_unwind; - } - lltrace = r; - if (!lltrace) { - // When tracing executed uops, also trace bytecode - char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); - if (uop_debug != NULL && *uop_debug >= '0') { - lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that - } - } - } - if (lltrace) { - lltrace_resume_frame(frame); - } + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; } #endif } From 2e586ab10371c54dd2d01f63788ea18510418d9d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 27 Aug 2023 09:48:31 +0100 Subject: [PATCH 3/5] Add back assert --- Python/bytecodes.c | 3 +++ Python/executor_cases.c.h | 3 +++ Python/generated_cases.c.h | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1dd9f43f33191e..57a95ed840cc9e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -768,6 +768,9 @@ dummy_func( // different frame, and it's accounted for by _PUSH_FRAME. op(_POP_FRAME, (retval --)) { assert(EMPTY()); + #if TIER_ONE + assert(frame != &entry_frame); + #endif STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3aebd5a38183ef..bc972420cf0963 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -690,6 +690,9 @@ retval = stack_pointer[-1]; STACK_SHRINK(1); assert(EMPTY()); + #if TIER_ONE + assert(frame != &entry_frame); + #endif STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 54056eea4834e2..ca7706ee7b26b0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -990,6 +990,9 @@ STACK_SHRINK(1); { assert(EMPTY()); + #if TIER_ONE + assert(frame != &entry_frame); + #endif STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: @@ -1055,6 +1058,9 @@ retval = value; { assert(EMPTY()); + #if TIER_ONE + assert(frame != &entry_frame); + #endif STORE_SP(); _Py_LeaveRecursiveCallPy(tstate); // GH-99729: We need to unlink the frame *before* clearing it: From c989bbef80b37604b498b54ce37bfc16f066c5b1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 27 Aug 2023 09:54:08 +0100 Subject: [PATCH 4/5] Fix lint rule --- Python/bytecodes.c | 1 - Python/executor_cases.c.h | 1 - Python/generated_cases.c.h | 2 -- 3 files changed, 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 57a95ed840cc9e..7f398391c5dc34 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -781,7 +781,6 @@ dummy_func( _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(); - #if LLTRACE && TIER_ONE lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); if (lltrace < 0) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index bc972420cf0963..e63a36d61579a4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -703,7 +703,6 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(); - #if LLTRACE && TIER_ONE lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); if (lltrace < 0) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ca7706ee7b26b0..a7bf20b9d1c7f6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1003,7 +1003,6 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(); - #if LLTRACE && TIER_ONE lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); if (lltrace < 0) { @@ -1071,7 +1070,6 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(); - #if LLTRACE && TIER_ONE lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); if (lltrace < 0) { From 48a470f6706eb560694daca0528f9d9fec3db799 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 28 Aug 2023 00:10:03 +0100 Subject: [PATCH 5/5] Remove unused macro --- Python/ceval_macros.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index b35a71d7726dca..4b7c4448e0ea25 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -381,10 +381,6 @@ static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { #if TIER_ONE -#define STORE_IP() \ -do {frame->prev_instr = next_instr-1; } while (0) - - #define LOAD_IP() \ do { next_instr = frame->prev_instr+1; } while (0) @@ -399,17 +395,12 @@ stack_pointer = _PyFrame_GetStackPointer(frame); #if TIER_TWO -#define STORE_IP() \ -do {frame->prev_instr = next_instr-1; } while (0) - - #define LOAD_IP() \ do { ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; } while (0) #define STORE_SP() \ _PyFrame_SetStackPointer(frame, stack_pointer) - #define LOAD_SP() \ stack_pointer = _PyFrame_GetStackPointer(frame);