Skip to content

Commit 6dab8c9

Browse files
GH-96458: Statically initialize utf8 representation of static strings (#96481)
1 parent 16c6759 commit 6dab8c9

File tree

6 files changed

+139
-165
lines changed

6 files changed

+139
-165
lines changed

Include/internal/pycore_runtime_init.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,12 @@ extern "C" {
113113
._ ## NAME = _PyASCIIObject_INIT(LITERAL)
114114
#define INIT_ID(NAME) \
115115
._ ## NAME = _PyASCIIObject_INIT(#NAME)
116-
#define _PyUnicode_LATIN1_INIT(LITERAL) \
116+
#define _PyUnicode_LATIN1_INIT(LITERAL, UTF8) \
117117
{ \
118118
._latin1 = { \
119119
._base = _PyUnicode_ASCII_BASE_INIT((LITERAL), 0), \
120+
.utf8 = (UTF8), \
121+
.utf8_length = sizeof(UTF8) - 1, \
120122
}, \
121123
._data = (LITERAL), \
122124
}

Include/internal/pycore_runtime_init_generated.h

+128-128
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject.h

-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
1919
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
2020
extern void _PyUnicode_Fini(PyInterpreterState *);
2121
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
22-
extern void _PyStaticUnicode_Dealloc(PyObject *);
2322

2423
extern PyTypeObject _PyUnicodeASCIIIter_Type;
2524

Objects/unicodeobject.c

-33
Original file line numberDiff line numberDiff line change
@@ -15184,23 +15184,6 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
1518415184
}
1518515185

1518615186

15187-
static void unicode_static_dealloc(PyObject *op)
15188-
{
15189-
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
15190-
15191-
assert(ascii->state.compact);
15192-
15193-
if (!ascii->state.ascii) {
15194-
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
15195-
if (compact->utf8) {
15196-
PyObject_Free(compact->utf8);
15197-
compact->utf8 = NULL;
15198-
compact->utf8_length = 0;
15199-
}
15200-
}
15201-
}
15202-
15203-
1520415187
void
1520515188
_PyUnicode_Fini(PyInterpreterState *interp)
1520615189
{
@@ -15217,24 +15200,8 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1521715200
_PyUnicode_FiniEncodings(&state->fs_codec);
1521815201

1521915202
unicode_clear_identifiers(state);
15220-
15221-
// Clear the single character singletons
15222-
for (int i = 0; i < 128; i++) {
15223-
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
15224-
}
15225-
for (int i = 0; i < 128; i++) {
15226-
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
15227-
}
1522815203
}
1522915204

15230-
15231-
void
15232-
_PyStaticUnicode_Dealloc(PyObject *op)
15233-
{
15234-
unicode_static_dealloc(op);
15235-
}
15236-
15237-
1523815205
/* A _string module, to export formatter_parser and formatter_field_name_split
1523915206
to the string.Formatter class implemented in Python. */
1524015207

Tools/scripts/deepfreeze.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ def generate_unicode(self, name: str, s: str) -> str:
195195
else:
196196
self.write("PyCompactUnicodeObject _compact;")
197197
self.write(f"{datatype} _data[{len(s)+1}];")
198-
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
199198
with self.block(f"{name} =", ";"):
200199
if ascii:
201200
with self.block("._ascii =", ","):
@@ -218,6 +217,9 @@ def generate_unicode(self, name: str, s: str) -> str:
218217
self.write(f".kind = {kind},")
219218
self.write(".compact = 1,")
220219
self.write(".ascii = 0,")
220+
utf8 = s.encode('utf-8')
221+
self.write(f'.utf8 = {make_string_literal(utf8)},')
222+
self.write(f'.utf8_length = {len(utf8)},')
221223
with self.block(f"._data =", ","):
222224
for i in range(0, len(s), 16):
223225
data = s[i:i+16]

Tools/scripts/generate_global_objects.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,11 @@ def generate_runtime_init(identifiers, strings):
287287
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
288288
with printer.block('.latin1 =', ','):
289289
for i in range(128, 256):
290-
printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}"),')
290+
utf8 = ['"']
291+
for c in chr(i).encode('utf-8'):
292+
utf8.append(f"\\x{c:02x}")
293+
utf8.append('"')
294+
printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
291295
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
292296
printer.write('')
293297
with printer.block('.tuple_empty =', ','):

0 commit comments

Comments
 (0)