Skip to content

Commit 8ec15b3

Browse files
committed
Simplify and speed up interpreter for f-strings. Split FORMAT_VALUE opcode into CONVERT_VALUE, FORMAT_SIMPLE and FORMAT_WITH_SPEC.
Compiler can then emit optimal sequence for each format expression.
1 parent c2b0b12 commit 8ec15b3

File tree

9 files changed

+426
-422
lines changed

9 files changed

+426
-422
lines changed

Include/opcode.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ extern "C" {
3030
#define BINARY_TRUE_DIVIDE 27
3131
#define INPLACE_FLOOR_DIVIDE 28
3232
#define INPLACE_TRUE_DIVIDE 29
33+
#define FORMAT_SIMPLE 40
34+
#define FORMAT_WITH_SPEC 41
3335
#define GET_AITER 50
3436
#define GET_ANEXT 51
3537
#define BEFORE_ASYNC_WITH 52
@@ -119,14 +121,14 @@ extern "C" {
119121
#define BUILD_TUPLE_UNPACK 152
120122
#define BUILD_SET_UNPACK 153
121123
#define SETUP_ASYNC_WITH 154
122-
#define FORMAT_VALUE 155
123124
#define BUILD_CONST_KEY_MAP 156
124125
#define BUILD_STRING 157
125126
#define BUILD_TUPLE_UNPACK_WITH_CALL 158
126127
#define LOAD_METHOD 160
127128
#define CALL_METHOD 161
128129
#define CALL_FINALLY 162
129130
#define POP_FINALLY 163
131+
#define CONVERT_VALUE 164
130132

131133
/* EXCEPT_HANDLER is a special, implicit block type which is created when
132134
entering an except handler. It is not an opcode but we define it here

Lib/dis.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
1717
classmethod, staticmethod, type)
1818

19-
FORMAT_VALUE = opmap['FORMAT_VALUE']
19+
CONVERT_VALUE = opmap['CONVERT_VALUE']
2020
FORMAT_VALUE_CONVERTERS = (
2121
(None, ''),
2222
(str, 'str'),
@@ -347,13 +347,9 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
347347
argrepr = argval
348348
elif op in hasfree:
349349
argval, argrepr = _get_name_info(arg, cells)
350-
elif op == FORMAT_VALUE:
351-
argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
352-
argval = (argval, bool(arg & 0x4))
353-
if argval[1]:
354-
if argrepr:
355-
argrepr += ', '
356-
argrepr += 'with format'
350+
elif op == CONVERT_VALUE:
351+
argval = (None, str, repr, ascii)[arg]
352+
argrepr = ('', 'str', 'repr', 'ascii')[arg]
357353
elif op == MAKE_FUNCTION:
358354
argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
359355
if arg & (1<<i))

Lib/opcode.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def jabs_op(name, op):
8484
def_op('INPLACE_FLOOR_DIVIDE', 28)
8585
def_op('INPLACE_TRUE_DIVIDE', 29)
8686

87+
def_op('FORMAT_SIMPLE', 40)
88+
def_op('FORMAT_WITH_SPEC', 41)
89+
8790
def_op('GET_AITER', 50)
8891
def_op('GET_ANEXT', 51)
8992
def_op('BEFORE_ASYNC_WITH', 52)
@@ -203,7 +206,7 @@ def jabs_op(name, op):
203206

204207
jrel_op('SETUP_ASYNC_WITH', 154)
205208

206-
def_op('FORMAT_VALUE', 155)
209+
207210
def_op('BUILD_CONST_KEY_MAP', 156)
208211
def_op('BUILD_STRING', 157)
209212
def_op('BUILD_TUPLE_UNPACK_WITH_CALL', 158)
@@ -213,4 +216,6 @@ def jabs_op(name, op):
213216
jrel_op('CALL_FINALLY', 162)
214217
def_op('POP_FINALLY', 163)
215218

219+
def_op('CONVERT_VALUE', 164)
220+
216221
del def_op, name_op, jrel_op, jabs_op

Lib/test/test_dis.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -311,20 +311,22 @@ def _fstring(a, b, c, d):
311311

312312
dis_fstring = """\
313313
%3d 0 LOAD_FAST 0 (a)
314-
2 FORMAT_VALUE 0
314+
2 FORMAT_SIMPLE
315315
4 LOAD_CONST 1 (' ')
316316
6 LOAD_FAST 1 (b)
317317
8 LOAD_CONST 2 ('4')
318-
10 FORMAT_VALUE 4 (with format)
318+
10 FORMAT_WITH_SPEC
319319
12 LOAD_CONST 1 (' ')
320320
14 LOAD_FAST 2 (c)
321-
16 FORMAT_VALUE 2 (repr)
322-
18 LOAD_CONST 1 (' ')
323-
20 LOAD_FAST 3 (d)
324-
22 LOAD_CONST 2 ('4')
325-
24 FORMAT_VALUE 6 (repr, with format)
326-
26 BUILD_STRING 7
327-
28 RETURN_VALUE
321+
16 CONVERT_VALUE 2 (repr)
322+
18 FORMAT_SIMPLE
323+
20 LOAD_CONST 1 (' ')
324+
22 LOAD_FAST 3 (d)
325+
24 CONVERT_VALUE 2 (repr)
326+
26 LOAD_CONST 2 ('4')
327+
28 FORMAT_WITH_SPEC
328+
30 BUILD_STRING 7
329+
32 RETURN_VALUE
328330
""" % (_fstring.__code__.co_firstlineno + 1,)
329331

330332
def _g(x):

Python/ceval.c

+41-46
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@ static PyObject * special_lookup(PyObject *, _Py_Identifier *);
7070
static int check_args_iterable(PyObject *func, PyObject *vararg);
7171
static void format_kwargs_mapping_error(PyObject *func, PyObject *kwargs);
7272

73+
typedef PyObject *(*convertion_func_ptr)(PyObject *);
74+
75+
static const convertion_func_ptr CONVERTION_FUNCTIONS[4] = {
76+
[FVC_STR] = PyObject_Str,
77+
[FVC_REPR] = PyObject_Repr,
78+
[FVC_ASCII] = PyObject_ASCII
79+
};
80+
7381
#define NAME_ERROR_MSG \
7482
"name '%.200s' is not defined"
7583
#define UNBOUNDLOCAL_ERROR_MSG \
@@ -3284,60 +3292,47 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
32843292
DISPATCH();
32853293
}
32863294

3287-
TARGET(FORMAT_VALUE) {
3288-
/* Handles f-string value formatting. */
3295+
TARGET(CONVERT_VALUE) {
32893296
PyObject *result;
3290-
PyObject *fmt_spec;
3291-
PyObject *value;
3292-
PyObject *(*conv_fn)(PyObject *);
3293-
int which_conversion = oparg & FVC_MASK;
3294-
int have_fmt_spec = (oparg & FVS_MASK) == FVS_HAVE_SPEC;
3295-
3296-
fmt_spec = have_fmt_spec ? POP() : NULL;
3297-
value = POP();
3298-
3299-
/* See if any conversion is specified. */
3300-
switch (which_conversion) {
3301-
case FVC_STR: conv_fn = PyObject_Str; break;
3302-
case FVC_REPR: conv_fn = PyObject_Repr; break;
3303-
case FVC_ASCII: conv_fn = PyObject_ASCII; break;
3304-
3305-
/* Must be 0 (meaning no conversion), since only four
3306-
values are allowed by (oparg & FVC_MASK). */
3307-
default: conv_fn = NULL; break;
3308-
}
3309-
3310-
/* If there's a conversion function, call it and replace
3311-
value with that result. Otherwise, just use value,
3312-
without conversion. */
3313-
if (conv_fn != NULL) {
3314-
result = conv_fn(value);
3315-
Py_DECREF(value);
3316-
if (result == NULL) {
3317-
Py_XDECREF(fmt_spec);
3318-
goto error;
3319-
}
3320-
value = result;
3297+
PyObject *value = POP();
3298+
convertion_func_ptr conv_fn;
3299+
assert(oparg >= FVC_STR && oparg <= FVC_ASCII);
3300+
conv_fn = CONVERTION_FUNCTIONS[oparg];
3301+
result = conv_fn(value);
3302+
Py_DECREF(value);
3303+
if (result == NULL) {
3304+
goto error;
33213305
}
3306+
PUSH(result);
3307+
DISPATCH();
3308+
}
33223309

3323-
/* If value is a unicode object, and there's no fmt_spec,
3324-
then we know the result of format(value) is value
3325-
itself. In that case, skip calling format(). I plan to
3326-
move this optimization in to PyObject_Format()
3327-
itself. */
3328-
if (PyUnicode_CheckExact(value) && fmt_spec == NULL) {
3329-
/* Do nothing, just transfer ownership to result. */
3330-
result = value;
3331-
} else {
3332-
/* Actually call format(). */
3333-
result = PyObject_Format(value, fmt_spec);
3334-
Py_DECREF(value);
3335-
Py_XDECREF(fmt_spec);
3310+
TARGET(FORMAT_SIMPLE) {
3311+
PyObject *value = TOP();
3312+
/* If value is a unicode object, then we know the result
3313+
* of format(value) is value itself. */
3314+
if (!PyUnicode_CheckExact(value)) {
3315+
PyObject *result = PyObject_Format(value, NULL);
33363316
if (result == NULL) {
33373317
goto error;
33383318
}
3319+
SET_TOP(result);
3320+
Py_DECREF(value);
33393321
}
3322+
DISPATCH();
3323+
}
33403324

3325+
TARGET(FORMAT_WITH_SPEC) {
3326+
PyObject *fmt_spec = POP();
3327+
PyObject *value = POP();
3328+
PyObject *result;
3329+
/* Call format(). */
3330+
result = PyObject_Format(value, fmt_spec);
3331+
Py_DECREF(value);
3332+
Py_DECREF(fmt_spec);
3333+
if (result == NULL) {
3334+
goto error;
3335+
}
33413336
PUSH(result);
33423337
DISPATCH();
33433338
}

Python/compile.c

+21-19
Original file line numberDiff line numberDiff line change
@@ -1111,10 +1111,12 @@ stack_effect(int opcode, int oparg, int jump)
11111111
return 1;
11121112
case GET_YIELD_FROM_ITER:
11131113
return 0;
1114-
case FORMAT_VALUE:
1115-
/* If there's a fmt_spec on the stack, we go from 2->1,
1116-
else 1->1. */
1117-
return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
1114+
case CONVERT_VALUE:
1115+
return 0;
1116+
case FORMAT_WITH_SPEC:
1117+
return -1;
1118+
case FORMAT_SIMPLE:
1119+
return 0;
11181120
case LOAD_METHOD:
11191121
return 1;
11201122
default:
@@ -3711,25 +3713,25 @@ compiler_formatted_value(struct compiler *c, expr_ty e)
37113713

37123714
/* Evaluate the expression to be formatted. */
37133715
VISIT(c, expr, e->v.FormattedValue.value);
3714-
3715-
switch (e->v.FormattedValue.conversion) {
3716-
case 's': oparg = FVC_STR; break;
3717-
case 'r': oparg = FVC_REPR; break;
3718-
case 'a': oparg = FVC_ASCII; break;
3719-
case -1: oparg = FVC_NONE; break;
3720-
default:
3721-
PyErr_SetString(PyExc_SystemError,
3722-
"Unrecognized conversion character");
3723-
return 0;
3716+
if (e->v.FormattedValue.conversion != -1) {
3717+
switch (e->v.FormattedValue.conversion) {
3718+
case 's': oparg = FVC_STR; break;
3719+
case 'r': oparg = FVC_REPR; break;
3720+
case 'a': oparg = FVC_ASCII; break;
3721+
default:
3722+
PyErr_SetString(PyExc_SystemError,
3723+
"Unrecognized conversion character");
3724+
return 0;
3725+
}
3726+
ADDOP_I(c, CONVERT_VALUE, oparg);
37243727
}
37253728
if (e->v.FormattedValue.format_spec) {
3726-
/* Evaluate the format spec, and update our opcode arg. */
3729+
/* Evaluate the format spec, and emit format opcode. */
37273730
VISIT(c, expr, e->v.FormattedValue.format_spec);
3728-
oparg |= FVS_HAVE_SPEC;
3731+
ADDOP(c, FORMAT_WITH_SPEC);
3732+
} else {
3733+
ADDOP(c, FORMAT_SIMPLE);
37293734
}
3730-
3731-
/* And push our opcode and oparg */
3732-
ADDOP_I(c, FORMAT_VALUE, oparg);
37333735
return 1;
37343736
}
37353737

0 commit comments

Comments
 (0)