Skip to content

Commit bcce5e2

Browse files
authored
gh-109039: Branch prediction for Tier 2 interpreter (#109038)
This adds a 16-bit inline cache entry to the conditional branch instructions POP_JUMP_IF_{FALSE,TRUE,NONE,NOT_NONE} and their instrumented variants, which is used to keep track of the branch direction. Each time we encounter these instructions we shift the cache entry left by one and set the bottom bit to whether we jumped. Then when it's time to translate such a branch to Tier 2 uops, we use the bit count from the cache entry to decided whether to continue translating the "didn't jump" branch or the "jumped" branch. The counter is initialized to a pattern of alternating ones and zeros to avoid bias. The .pyc file magic number is updated. There's a new test, some fixes for existing tests, and a few miscellaneous cleanups.
1 parent ecd21a6 commit bcce5e2

File tree

15 files changed

+339
-181
lines changed

15 files changed

+339
-181
lines changed

Include/internal/pycore_instruments.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
# error "this header requires Py_BUILD_CORE define"
66
#endif
77

8-
#include "pycore_bitutils.h" // _Py_popcount32
98
#include "pycore_frame.h" // _PyInterpreterFrame
109

1110
#ifdef __cplusplus

Include/internal/pycore_opcode_metadata.h

Lines changed: 13 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/importlib/_bootstrap_external.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ def _write_atomic(path, data, mode=0o666):
456456
# Python 3.13a1 3558 (Reorder the stack items for CALL)
457457
# Python 3.13a1 3559 (Generate opcode IDs from bytecodes.c)
458458
# Python 3.13a1 3560 (Add RESUME_CHECK instruction)
459+
# Python 3.13a1 3561 (Add cache entry to branch instructions)
459460

460461
# Python 3.14 will start with 3600
461462

@@ -472,7 +473,7 @@ def _write_atomic(path, data, mode=0o666):
472473
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
473474
# in PC/launcher.c must also be updated.
474475

475-
MAGIC_NUMBER = (3560).to_bytes(2, 'little') + b'\r\n'
476+
MAGIC_NUMBER = (3561).to_bytes(2, 'little') + b'\r\n'
476477

477478
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
478479

Lib/opcode.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,18 @@
9393
"counter": 1,
9494
"version": 2,
9595
},
96+
"POP_JUMP_IF_TRUE": {
97+
"counter": 1,
98+
},
99+
"POP_JUMP_IF_FALSE": {
100+
"counter": 1,
101+
},
102+
"POP_JUMP_IF_NONE": {
103+
"counter": 1,
104+
},
105+
"POP_JUMP_IF_NOT_NONE": {
106+
"counter": 1,
107+
},
96108
}
97109

98110
_inline_cache_entries = {

Lib/test/support/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
"LOOPBACK_TIMEOUT", "INTERNET_TIMEOUT", "SHORT_TIMEOUT", "LONG_TIMEOUT",
6363
"Py_DEBUG", "EXCEEDS_RECURSION_LIMIT", "Py_C_RECURSION_LIMIT",
6464
"skip_on_s390x",
65+
"without_optimizer",
6566
]
6667

6768

@@ -2533,3 +2534,19 @@ def adjust_int_max_str_digits(max_digits):
25332534
'skipped on s390x')
25342535

25352536
Py_TRACE_REFS = hasattr(sys, 'getobjects')
2537+
2538+
# Decorator to disable optimizer while a function run
2539+
def without_optimizer(func):
2540+
try:
2541+
import _testinternalcapi
2542+
except ImportError:
2543+
return func
2544+
@functools.wraps(func)
2545+
def wrapper(*args, **kwargs):
2546+
save_opt = _testinternalcapi.get_optimizer()
2547+
try:
2548+
_testinternalcapi.set_optimizer(None)
2549+
return func(*args, **kwargs)
2550+
finally:
2551+
_testinternalcapi.set_optimizer(save_opt)
2552+
return wrapper

Lib/test/test_capi/test_misc.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,7 +2455,7 @@ def testfunc(x):
24552455
opt = _testinternalcapi.get_uop_optimizer()
24562456

24572457
with temporary_optimizer(opt):
2458-
testfunc(10)
2458+
testfunc(20)
24592459

24602460
ex = get_first_executor(testfunc)
24612461
self.assertIsNotNone(ex)
@@ -2470,7 +2470,7 @@ def testfunc(n):
24702470

24712471
opt = _testinternalcapi.get_uop_optimizer()
24722472
with temporary_optimizer(opt):
2473-
testfunc(10)
2473+
testfunc(20)
24742474

24752475
ex = get_first_executor(testfunc)
24762476
self.assertIsNotNone(ex)
@@ -2485,7 +2485,7 @@ def testfunc(a):
24852485

24862486
opt = _testinternalcapi.get_uop_optimizer()
24872487
with temporary_optimizer(opt):
2488-
testfunc(range(10))
2488+
testfunc(range(20))
24892489

24902490
ex = get_first_executor(testfunc)
24912491
self.assertIsNotNone(ex)
@@ -2495,12 +2495,13 @@ def testfunc(a):
24952495
def test_pop_jump_if_not_none(self):
24962496
def testfunc(a):
24972497
for x in a:
2498+
x = None
24982499
if x is not None:
24992500
x = 0
25002501

25012502
opt = _testinternalcapi.get_uop_optimizer()
25022503
with temporary_optimizer(opt):
2503-
testfunc(range(10))
2504+
testfunc(range(20))
25042505

25052506
ex = get_first_executor(testfunc)
25062507
self.assertIsNotNone(ex)
@@ -2515,7 +2516,7 @@ def testfunc(n):
25152516

25162517
opt = _testinternalcapi.get_uop_optimizer()
25172518
with temporary_optimizer(opt):
2518-
testfunc(10)
2519+
testfunc(20)
25192520

25202521
ex = get_first_executor(testfunc)
25212522
self.assertIsNotNone(ex)
@@ -2530,7 +2531,7 @@ def testfunc(n):
25302531

25312532
opt = _testinternalcapi.get_uop_optimizer()
25322533
with temporary_optimizer(opt):
2533-
testfunc(10)
2534+
testfunc(20)
25342535

25352536
ex = get_first_executor(testfunc)
25362537
self.assertIsNotNone(ex)
@@ -2550,7 +2551,7 @@ def testfunc(n):
25502551

25512552
opt = _testinternalcapi.get_uop_optimizer()
25522553
with temporary_optimizer(opt):
2553-
testfunc(10)
2554+
testfunc(20)
25542555

25552556
ex = get_first_executor(testfunc)
25562557
self.assertIsNotNone(ex)
@@ -2568,8 +2569,8 @@ def testfunc(n):
25682569

25692570
opt = _testinternalcapi.get_uop_optimizer()
25702571
with temporary_optimizer(opt):
2571-
total = testfunc(10)
2572-
self.assertEqual(total, 45)
2572+
total = testfunc(20)
2573+
self.assertEqual(total, 190)
25732574

25742575
ex = get_first_executor(testfunc)
25752576
self.assertIsNotNone(ex)
@@ -2589,9 +2590,9 @@ def testfunc(a):
25892590

25902591
opt = _testinternalcapi.get_uop_optimizer()
25912592
with temporary_optimizer(opt):
2592-
a = list(range(10))
2593+
a = list(range(20))
25932594
total = testfunc(a)
2594-
self.assertEqual(total, 45)
2595+
self.assertEqual(total, 190)
25952596

25962597
ex = get_first_executor(testfunc)
25972598
self.assertIsNotNone(ex)
@@ -2611,9 +2612,9 @@ def testfunc(a):
26112612

26122613
opt = _testinternalcapi.get_uop_optimizer()
26132614
with temporary_optimizer(opt):
2614-
a = tuple(range(10))
2615+
a = tuple(range(20))
26152616
total = testfunc(a)
2616-
self.assertEqual(total, 45)
2617+
self.assertEqual(total, 190)
26172618

26182619
ex = get_first_executor(testfunc)
26192620
self.assertIsNotNone(ex)
@@ -2647,14 +2648,30 @@ def dummy(x):
26472648

26482649
opt = _testinternalcapi.get_uop_optimizer()
26492650
with temporary_optimizer(opt):
2650-
testfunc(10)
2651+
testfunc(20)
26512652

26522653
ex = get_first_executor(testfunc)
26532654
self.assertIsNotNone(ex)
26542655
uops = {opname for opname, _, _ in ex}
26552656
self.assertIn("_PUSH_FRAME", uops)
26562657
self.assertIn("_BINARY_OP_ADD_INT", uops)
26572658

2659+
def test_branch_taken(self):
2660+
def testfunc(n):
2661+
for i in range(n):
2662+
if i < 0:
2663+
i = 0
2664+
else:
2665+
i = 1
2666+
2667+
opt = _testinternalcapi.get_uop_optimizer()
2668+
with temporary_optimizer(opt):
2669+
testfunc(20)
2670+
2671+
ex = get_first_executor(testfunc)
2672+
self.assertIsNotNone(ex)
2673+
uops = {opname for opname, _, _ in ex}
2674+
self.assertIn("_POP_JUMP_IF_TRUE", uops)
26582675

26592676

26602677
if __name__ == "__main__":

0 commit comments

Comments
 (0)