python · gpshead · Mar 30, 2020 · Mar 30, 2020 · Mar 30, 2020 · Mar 30, 2020
diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
@@ -5,7 +5,7 @@
 typedef struct {
     PyObject_VAR_HEAD
     Py_hash_t ob_shash;
-    char ob_sval[1];
+    char ob_sval[];
 
     /* Invariants:
      *     ob_sval contains space for 'ob_size+1' elements.

diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
@@ -1197,7 +1197,7 @@ class C(object): pass
         # EncodingMap
         import codecs, encodings.iso8859_3
         x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
-        check(x, size('32B2iB'))
+        check(x, size('32B2i'))
         # enumerate
         check(enumerate([]), size('n3P'))
         # reverse

diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst b/Misc/NEWS.d/next/Core and Builtins/2020-03-30-14-06-40.bpo-40120.6ptcf4.rst
@@ -0,0 +1,4 @@
+Fixed internal structure definitions for structs such as PyBytesObject and
+unicode's encoding_map to not rely on C undefined behavior for access to
+their trailing unbounded character array in favor of C99 approved flexible
+array member syntax.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
@@ -28,6 +28,9 @@ _Py_IDENTIFIER(__bytes__);
 
    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
    3 bytes per string allocation on a typical system.
+
+   The + 1 accounts for the trailing \0 byte that we include as a safety
+   measure for code that treats the underlying char * as a C string.
 */
 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
 

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -8208,14 +8208,14 @@ struct encoding_map {
     PyObject_HEAD
     unsigned char level1[32];
     int count2, count3;
-    unsigned char level23[1];
+    unsigned char level23[];
 };
 
 static PyObject*
 encoding_map_size(PyObject *obj, PyObject* args)
 {
     struct encoding_map *map = (struct encoding_map*)obj;
-    return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
+    return PyLong_FromLong(sizeof(*map) + 16*map->count2 +
                            128*map->count3);
 }
 
@@ -8347,7 +8347,7 @@ PyUnicode_BuildEncodingMap(PyObject* string)
 
     /* Create a three-level trie */
     result = PyObject_MALLOC(sizeof(struct encoding_map) +
-                             16*count2 + 128*count3 - 1);
+                             16*count2 + 128*count3);
     if (!result)
         return PyErr_NoMemory();
     PyObject_Init(result, &EncodingMapType);