Skip to content

bpo-1635741: Release Unicode interned strings at exit #21269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_pylifecycle.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate);
extern void _PyType_Fini(void);
extern void _Py_HashRandomization_Fini(void);
extern void _PyUnicode_Fini(PyThreadState *tstate);
extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
extern void _PyLong_Fini(PyThreadState *tstate);
extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void);
Expand Down
60 changes: 28 additions & 32 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif

/* Uncomment to display statistics on interned strings at exit when
using Valgrind or Insecure++. */
/* Uncomment to display statistics on interned strings at exit
in _PyUnicode_ClearInterned(). */
/* #define INTERNED_STATS 1 */


Expand Down Expand Up @@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p)
}

#ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) {
PyErr_Clear();
return;
}

if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
Expand Down Expand Up @@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp)
}


#if defined(WITH_VALGRIND) || defined(__INSURE__)
static void
unicode_release_interned(void)
void
_PyUnicode_ClearInterned(PyThreadState *tstate)
{
if (interned == NULL || !PyDict_Check(interned)) {
if (!_Py_IsMainInterpreter(tstate)) {
// interned dict is shared by all interpreters
return;
}

if (interned == NULL) {
return;
}
assert(PyDict_CheckExact(interned));

PyObject *keys = PyDict_Keys(interned);
if (keys == NULL || !PyList_Check(keys)) {
if (keys == NULL) {
PyErr_Clear();
return;
}
assert(PyList_CheckExact(keys));

/* Since unicode_release_interned() is intended to help a leak
detector, interned unicode strings are not forcibly deallocated;
rather, we give them their stolen references back, and then clear
and DECREF the interned dict. */
/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */

Py_ssize_t n = PyList_GET_SIZE(keys);
#ifdef INTERNED_STATS
Expand All @@ -15759,9 +15770,8 @@ unicode_release_interned(void)
#endif
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *s = PyList_GET_ITEM(keys, i);
if (PyUnicode_READY(s) == -1) {
Py_UNREACHABLE();
}
assert(PyUnicode_IS_READY(s));

switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
Expand All @@ -15788,10 +15798,10 @@ unicode_release_interned(void)
mortal_size, immortal_size);
#endif
Py_DECREF(keys);

PyDict_Clear(interned);
Py_CLEAR(interned);
}
#endif


/********************* Unicode Iterator **************************/
Expand Down Expand Up @@ -16160,31 +16170,17 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
void
_PyUnicode_Fini(PyThreadState *tstate)
{
struct _Py_unicode_state *state = &tstate->interp->unicode;
// _PyUnicode_ClearInterned() must be called before

int is_main_interp = _Py_IsMainInterpreter(tstate);
if (is_main_interp) {
#if defined(WITH_VALGRIND) || defined(__INSURE__)
/* Insure++ is a memory analysis tool that aids in discovering
* memory leaks and other memory problems. On Python exit, the
* interned string dictionaries are flagged as being in use at exit
* (which it is). Under normal circumstances, this is fine because
* the memory will be automatically reclaimed by the system. Under
* memory debugging, it's a huge source of useless noise, so we
* trade off slower shutdown for less distraction in the memory
* reports. -baw
*/
unicode_release_interned();
#endif /* __INSURE__ */
}
struct _Py_unicode_state *state = &tstate->interp->unicode;

Py_CLEAR(state->empty_string);

for (Py_ssize_t i = 0; i < 256; i++) {
Py_CLEAR(state->latin1[i]);
}

if (is_main_interp) {
if (_Py_IsMainInterpreter(tstate)) {
unicode_clear_static_strings();
}

Expand Down
1 change: 1 addition & 0 deletions Python/pylifecycle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate)
_PyFrame_Fini(tstate);
_PyAsyncGen_Fini(tstate);
_PyContext_Fini(tstate);
_PyUnicode_ClearInterned(tstate);

_PyDict_Fini(tstate);
_PyList_Fini(tstate);
Expand Down