Skip to content

bpo-43950: Initial base implementation for PEP 657 #26955

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,13 @@ struct PyCodeObject {
PyObject *co_localspluskinds; /* Bytes mapping to local kinds (one byte per variable) */
PyObject *co_filename; /* unicode (where it was loaded from) */
PyObject *co_name; /* unicode (name, for reference) */
PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See
PyObject *co_linetable; /* bytes (encoding addr<->lineno mapping) See
Objects/lnotab_notes.txt for details. */
PyObject *co_endlinetable; /* bytes object that holds end lineno for
instructions separated across different
lines */
PyObject *co_columntable; /* bytes object that holds start/end column
offset each instruction */

/* These fields are set with computed values on new code objects. */

Expand Down Expand Up @@ -149,12 +154,14 @@ PyAPI_DATA(PyTypeObject) PyCode_Type;
PyAPI_FUNC(PyCodeObject *) PyCode_New(
int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, int, PyObject *, PyObject *);
PyObject *, PyObject *, int, PyObject *, PyObject *,
PyObject *, PyObject *);

PyAPI_FUNC(PyCodeObject *) PyCode_NewWithPosOnlyArgs(
int, int, int, int, int, int, PyObject *, PyObject *,
PyObject *, PyObject *, PyObject *, PyObject *,
PyObject *, PyObject *, int, PyObject *, PyObject *);
PyObject *, PyObject *, int, PyObject *, PyObject *,
PyObject *, PyObject *);
/* same as struct above */

/* Creates a new empty code object with the specified source location. */
Expand All @@ -166,6 +173,15 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno);
use PyFrame_GetLineNumber() instead. */
PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int);

PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, int *);

/* Return the ending source code line number from a bytecode index. */
PyAPI_FUNC(int) _PyCode_Addr2EndLine(PyCodeObject *, int);
/* Return the starting source code column offset from a bytecode index. */
PyAPI_FUNC(int) _PyCode_Addr2Offset(PyCodeObject *, int);
/* Return the ending source code column offset from a bytecode index. */
PyAPI_FUNC(int) _PyCode_Addr2EndOffset(PyCodeObject *, int);

/* for internal use only */
struct _opaque {
int computed_line;
Expand Down Expand Up @@ -203,8 +219,9 @@ PyAPI_FUNC(int) _PyCode_GetExtra(PyObject *code, Py_ssize_t index,
PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index,
void *extra);

/** API for initializing the line number table. */
/** API for initializing the line number tables. */
int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
int _PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds);

/** Out of process API for initializing the line number table. */
void PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range);
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ struct _PyCodeConstructor {
PyObject *code;
int firstlineno;
PyObject *linetable;
PyObject *endlinetable;
PyObject *columntable;

/* used by the code */
PyObject *consts;
Expand Down
6 changes: 3 additions & 3 deletions Lib/ctypes/test/test_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ class struct_frozen(Structure):
continue
items.append((entry.name.decode("ascii"), entry.size))

expected = [("__hello__", 133),
("__phello__", -133),
("__phello__.spam", 133),
expected = [("__hello__", 159),
("__phello__", -159),
("__phello__.spam", 159),
]
self.assertEqual(items, expected, "PyImport_FrozenModules example "
"in Doc/library/ctypes.rst may be out of date")
Expand Down
3 changes: 2 additions & 1 deletion Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.11a1 3456 (interleave cell args bpo-43693)
# Python 3.11a1 3457 (Change localsplus to a bytes object bpo-43693)
# Python 3.11a1 3458 (imported objects now don't use LOAD_METHOD/CALL_METHOD)
# Python 3.11a1 3459 (PEP 657: add end line numbers and column offsets for instructions)

#
# MAGIC must change whenever the bytecode emitted by the compiler may no
Expand All @@ -370,7 +371,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3458).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3459).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down
83 changes: 83 additions & 0 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@
import sys
import threading
import unittest
import textwrap
import weakref

try:
import ctypes
except ImportError:
Expand Down Expand Up @@ -223,6 +225,8 @@ def func(): pass
co.co_name,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars)
Expand Down Expand Up @@ -257,6 +261,8 @@ def func2():
("co_filename", "newfilename"),
("co_name", "newname"),
("co_linetable", code2.co_linetable),
("co_endlinetable", code2.co_endlinetable),
("co_columntable", code2.co_columntable),
):
with self.subTest(attr=attr, value=value):
new_code = code.replace(**{attr: value})
Expand Down Expand Up @@ -293,6 +299,8 @@ def func():
co.co_name,
co.co_firstlineno,
co.co_lnotab,
co.co_endlinetable,
co.co_columntable,
co.co_exceptiontable,
co.co_freevars,
co.co_cellvars,
Expand All @@ -309,6 +317,81 @@ def func():
new_code = code = func.__code__.replace(co_linetable=b'')
self.assertEqual(list(new_code.co_lines()), [])

def test_co_positions_artificial_instructions(self):
import dis

namespace = {}
exec(textwrap.dedent("""\
try:
1/0
except Exception as e:
exc = e
"""), namespace)

exc = namespace['exc']
traceback = exc.__traceback__
code = traceback.tb_frame.f_code

artificial_instructions = []
for instr, positions in zip(
dis.get_instructions(code),
code.co_positions(),
strict=True
):
# If any of the positions is None, then all have to
# be None as well for the case above. There are still
# some places in the compiler, where the artificial instructions
# get assigned the first_lineno but they don't have other positions.
# There is no easy way of inferring them at that stage, so for now
# we don't support it.
self.assertTrue(all(positions) or not any(positions))

if not any(positions):
artificial_instructions.append(instr)

self.assertEqual(
[
(instruction.opname, instruction.argval)
for instruction in artificial_instructions
],
[
("PUSH_EXC_INFO", None),
("LOAD_CONST", None), # artificial 'None'
("STORE_NAME", "e"), # XX: we know the location for this
("DELETE_NAME", "e"),
("RERAISE", 1),
("POP_EXCEPT_AND_RERAISE", None)
]
)

# co_positions behavior when info is missing.

def test_co_positions_empty_linetable(self):
def func():
x = 1
new_code = func.__code__.replace(co_linetable=b'')
for line, end_line, column, end_column in new_code.co_positions():
self.assertIsNone(line)
self.assertEqual(end_line, new_code.co_firstlineno + 1)

def test_co_positions_empty_endlinetable(self):
def func():
x = 1
new_code = func.__code__.replace(co_endlinetable=b'')
for line, end_line, column, end_column in new_code.co_positions():
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertIsNone(end_line)

def test_co_positions_empty_columntable(self):
def func():
x = 1
new_code = func.__code__.replace(co_columntable=b'')
for line, end_line, column, end_column in new_code.co_positions():
self.assertEqual(line, new_code.co_firstlineno + 1)
self.assertEqual(end_line, new_code.co_firstlineno + 1)
self.assertIsNone(column)
self.assertIsNone(end_column)


def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
Expand Down
114 changes: 114 additions & 0 deletions Lib/test/test_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import unittest
import sys
import ast
import _ast
import tempfile
import types
Expand Down Expand Up @@ -985,6 +986,119 @@ def if_else_break():
self.assertNotEqual(instr.arg, (line + 1)*INSTR_SIZE)


class TestSourcePositions(unittest.TestCase):
# Ensure that compiled code snippets have correct line and column numbers
# in `co_positions()`.

def check_positions_against_ast(self, snippet):
# Basic check that makes sure each line and column is at least present
# in one of the AST nodes of the source code.
code = compile(snippet, 'test_compile.py', 'exec')
ast_tree = compile(snippet, 'test_compile.py', 'exec', _ast.PyCF_ONLY_AST)
self.assertTrue(type(ast_tree) == _ast.Module)

# Use an AST visitor that notes all the offsets.
lines, end_lines, columns, end_columns = set(), set(), set(), set()
class SourceOffsetVisitor(ast.NodeVisitor):
def generic_visit(self, node):
super().generic_visit(node)
if not isinstance(node, ast.expr) and not isinstance(node, ast.stmt):
return
lines.add(node.lineno)
end_lines.add(node.end_lineno)
columns.add(node.col_offset + 1)
end_columns.add(node.end_col_offset + 1)

SourceOffsetVisitor().visit(ast_tree)

# Check against the positions in the code object.
for (line, end_line, col, end_col) in code.co_positions():
# If the offset is not None (indicating missing data), ensure that
# it was part of one of the AST nodes.
if line is not None:
self.assertIn(line, lines)
if end_line is not None:
self.assertIn(end_line, end_lines)
if col is not None:
self.assertIn(col, columns)
if end_col is not None:
self.assertIn(end_col, end_columns)

return code, ast_tree

def assertOpcodeSourcePositionIs(self, code, opcode,
line, end_line, column, end_column):

for instr, position in zip(dis.Bytecode(code), code.co_positions()):
if instr.opname == opcode:
self.assertEqual(position[0], line)
self.assertEqual(position[1], end_line)
self.assertEqual(position[2], column)
self.assertEqual(position[3], end_column)
return

self.fail(f"Opcode {opcode} not found in code")

def test_simple_assignment(self):
snippet = "x = 1"
self.check_positions_against_ast(snippet)

def test_compiles_to_extended_op_arg(self):
# Make sure we still have valid positions when the code compiles to an
# EXTENDED_ARG by performing a loop which needs a JUMP_ABSOLUTE after
# a bunch of opcodes.
snippet = "x = x\n" * 10_000
snippet += ("while x != 0:\n"
" x -= 1\n"
"while x != 0:\n"
" x += 1\n"
)

compiled_code, _ = self.check_positions_against_ast(snippet)

self.assertOpcodeSourcePositionIs(compiled_code, 'INPLACE_SUBTRACT',
line=10_000 + 2, end_line=10_000 + 2,
column=3, end_column=9)
self.assertOpcodeSourcePositionIs(compiled_code, 'INPLACE_ADD',
line=10_000 + 4, end_line=10_000 + 4,
column=3, end_column=10)

def test_multiline_expression(self):
snippet = """\
f(
1, 2, 3, 4
)
"""
compiled_code, _ = self.check_positions_against_ast(snippet)
self.assertOpcodeSourcePositionIs(compiled_code, 'CALL_FUNCTION',
line=1, end_line=3, column=1, end_column=2)

def test_very_long_line_end_offset(self):
# Make sure we get None for when the column offset is too large to
# store in a byte.
long_string = "a" * 1000
snippet = f"g('{long_string}')"

compiled_code, _ = self.check_positions_against_ast(snippet)
self.assertOpcodeSourcePositionIs(compiled_code, 'CALL_FUNCTION',
line=1, end_line=1, column=None, end_column=None)

def test_complex_single_line_expression(self):
snippet = "a - b @ (c * x['key'] + 23)"

compiled_code, _ = self.check_positions_against_ast(snippet)
self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_SUBSCR',
line=1, end_line=1, column=14, end_column=22)
self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_MULTIPLY',
line=1, end_line=1, column=10, end_column=22)
self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_ADD',
line=1, end_line=1, column=10, end_column=27)
self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_MATRIX_MULTIPLY',
line=1, end_line=1, column=5, end_column=28)
self.assertOpcodeSourcePositionIs(compiled_code, 'BINARY_SUBTRACT',
line=1, end_line=1, column=1, end_column=28)


class TestExpressionStackSize(unittest.TestCase):
# These tests check that the computed stack size for a code object
# stays within reasonable bounds (see issue #21523 for an example
Expand Down
Loading