Skip to content

Commit c14d7e4

Browse files
authored
bpo-47164: Add _PyASCIIObject_CAST() macro (GH-32191)
Add macros to cast objects to PyASCIIObject*, PyCompactUnicodeObject* and PyUnicodeObject*: _PyASCIIObject_CAST(), _PyCompactUnicodeObject_CAST() and _PyUnicodeObject_CAST(). Using these new macros make the code more readable and check their argument with: assert(PyUnicode_Check(op)). Remove redundant assert(PyUnicode_Check(op)) in macros using directly or indirectly these new CAST macros. Replacing existing casts with these macros.
1 parent db4dada commit c14d7e4

File tree

8 files changed

+76
-78
lines changed

8 files changed

+76
-78
lines changed

Include/cpython/unicodeobject.h

+34-33
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,15 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
234234
PyObject *op,
235235
int check_content);
236236

237+
238+
#define _PyASCIIObject_CAST(op) \
239+
(assert(PyUnicode_Check(op)), (PyASCIIObject*)(op))
240+
#define _PyCompactUnicodeObject_CAST(op) \
241+
(assert(PyUnicode_Check(op)), (PyCompactUnicodeObject*)(op))
242+
#define _PyUnicodeObject_CAST(op) \
243+
(assert(PyUnicode_Check(op)), (PyUnicodeObject*)(op))
244+
245+
237246
/* Fast access macros */
238247

239248
/* Returns the deprecated Py_UNICODE representation's size in code units
@@ -243,11 +252,10 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
243252

244253
/* Py_DEPRECATED(3.3) */
245254
#define PyUnicode_GET_SIZE(op) \
246-
(assert(PyUnicode_Check(op)), \
247-
(((PyASCIIObject *)(op))->wstr) ? \
255+
(_PyASCIIObject_CAST(op)->wstr ? \
248256
PyUnicode_WSTR_LENGTH(op) : \
249257
((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
250-
assert(((PyASCIIObject *)(op))->wstr), \
258+
assert(_PyASCIIObject_CAST(op)->wstr), \
251259
PyUnicode_WSTR_LENGTH(op)))
252260

253261
/* Py_DEPRECATED(3.3) */
@@ -261,9 +269,8 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
261269

262270
/* Py_DEPRECATED(3.3) */
263271
#define PyUnicode_AS_UNICODE(op) \
264-
(assert(PyUnicode_Check(op)), \
265-
(((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
266-
PyUnicode_AsUnicode(_PyObject_CAST(op)))
272+
(_PyASCIIObject_CAST(op)->wstr ? _PyASCIIObject_CAST(op)->wstr : \
273+
PyUnicode_AsUnicode(_PyObject_CAST(op)))
267274

268275
/* Py_DEPRECATED(3.3) */
269276
#define PyUnicode_AS_DATA(op) \
@@ -281,25 +288,24 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
281288

282289
/* Use only if you know it's a string */
283290
#define PyUnicode_CHECK_INTERNED(op) \
284-
(((PyASCIIObject *)(op))->state.interned)
291+
(_PyASCIIObject_CAST(op)->state.interned)
285292

286293
/* Return true if the string contains only ASCII characters, or 0 if not. The
287294
string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
288295
ready. */
289296
#define PyUnicode_IS_ASCII(op) \
290-
(assert(PyUnicode_Check(op)), \
291-
assert(PyUnicode_IS_READY(op)), \
292-
((PyASCIIObject*)op)->state.ascii)
297+
(assert(PyUnicode_IS_READY(op)), \
298+
_PyASCIIObject_CAST(op)->state.ascii)
293299

294300
/* Return true if the string is compact or 0 if not.
295301
No type checks or Ready calls are performed. */
296302
#define PyUnicode_IS_COMPACT(op) \
297-
(((PyASCIIObject*)(op))->state.compact)
303+
(_PyASCIIObject_CAST(op)->state.compact)
298304

299305
/* Return true if the string is a compact ASCII string (use PyASCIIObject
300306
structure), or 0 if not. No type checks or Ready calls are performed. */
301307
#define PyUnicode_IS_COMPACT_ASCII(op) \
302-
(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
308+
(_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op))
303309

304310
enum PyUnicode_Kind {
305311
/* String contains only wstr byte characters. This is only possible
@@ -323,23 +329,21 @@ enum PyUnicode_Kind {
323329

324330
/* Return one of the PyUnicode_*_KIND values defined above. */
325331
#define PyUnicode_KIND(op) \
326-
(assert(PyUnicode_Check(op)), \
327-
assert(PyUnicode_IS_READY(op)), \
328-
((PyASCIIObject *)(op))->state.kind)
332+
(assert(PyUnicode_IS_READY(op)), \
333+
_PyASCIIObject_CAST(op)->state.kind)
329334

330335
/* Return a void pointer to the raw unicode buffer. */
331336
#define _PyUnicode_COMPACT_DATA(op) \
332-
(PyUnicode_IS_ASCII(op) ? \
333-
((void*)((PyASCIIObject*)(op) + 1)) : \
334-
((void*)((PyCompactUnicodeObject*)(op) + 1)))
337+
(PyUnicode_IS_ASCII(op) ? \
338+
((void*)(_PyASCIIObject_CAST(op) + 1)) : \
339+
((void*)(_PyCompactUnicodeObject_CAST(op) + 1)))
335340

336341
#define _PyUnicode_NONCOMPACT_DATA(op) \
337-
(assert(((PyUnicodeObject*)(op))->data.any), \
338-
((((PyUnicodeObject *)(op))->data.any)))
342+
(assert(_PyUnicodeObject_CAST(op)->data.any), \
343+
(_PyUnicodeObject_CAST(op)->data.any))
339344

340345
#define PyUnicode_DATA(op) \
341-
(assert(PyUnicode_Check(op)), \
342-
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
346+
(PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
343347
_PyUnicode_NONCOMPACT_DATA(op))
344348

345349
/* In the access macros below, "kind" may be evaluated more than once.
@@ -386,8 +390,7 @@ enum PyUnicode_Kind {
386390
PyUnicode_READ_CHAR, for multiple consecutive reads callers should
387391
cache kind and use PyUnicode_READ instead. */
388392
#define PyUnicode_READ_CHAR(unicode, index) \
389-
(assert(PyUnicode_Check(unicode)), \
390-
assert(PyUnicode_IS_READY(unicode)), \
393+
(assert(PyUnicode_IS_READY(unicode)), \
391394
(Py_UCS4) \
392395
(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
393396
((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
@@ -401,23 +404,21 @@ enum PyUnicode_Kind {
401404
the string has it's canonical representation set before calling
402405
this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
403406
#define PyUnicode_GET_LENGTH(op) \
404-
(assert(PyUnicode_Check(op)), \
405-
assert(PyUnicode_IS_READY(op)), \
406-
((PyASCIIObject *)(op))->length)
407+
(assert(PyUnicode_IS_READY(op)), \
408+
_PyASCIIObject_CAST(op)->length)
407409

408410

409411
/* Fast check to determine whether an object is ready. Equivalent to
410-
PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any */
412+
PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */
411413

412-
#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
414+
#define PyUnicode_IS_READY(op) (_PyASCIIObject_CAST(op)->state.ready)
413415

414416
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
415417
case. If the canonical representation is not yet set, it will still call
416418
_PyUnicode_Ready().
417419
Returns 0 on success and -1 on errors. */
418420
#define PyUnicode_READY(op) \
419-
(assert(PyUnicode_Check(op)), \
420-
(PyUnicode_IS_READY(op) ? \
421+
((PyUnicode_IS_READY(op) ? \
421422
0 : _PyUnicode_Ready(_PyObject_CAST(op))))
422423

423424
/* Return a maximum character value which is suitable for creating another
@@ -436,8 +437,8 @@ enum PyUnicode_Kind {
436437
Py_DEPRECATED(3.3)
437438
static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) {
438439
return PyUnicode_IS_COMPACT_ASCII(op) ?
439-
((PyASCIIObject*)op)->length :
440-
((PyCompactUnicodeObject*)op)->wstr_length;
440+
_PyASCIIObject_CAST(op)->length :
441+
_PyCompactUnicodeObject_CAST(op)->wstr_length;
441442
}
442443
#define PyUnicode_WSTR_LENGTH(op) PyUnicode_WSTR_LENGTH(_PyObject_CAST(op))
443444

Include/unicodeobject.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ PyAPI_DATA(PyTypeObject) PyUnicode_Type;
112112
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
113113

114114
#define PyUnicode_Check(op) \
115-
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
115+
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
116116
#define PyUnicode_CheckExact(op) Py_IS_TYPE(op, &PyUnicode_Type)
117117

118118
/* --- Constants ---------------------------------------------------------- */

Modules/_collectionsmodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2352,7 +2352,7 @@ _collections__count_elements_impl(PyObject *module, PyObject *mapping,
23522352
break;
23532353

23542354
if (!PyUnicode_CheckExact(key) ||
2355-
(hash = ((PyASCIIObject *) key)->hash) == -1)
2355+
(hash = _PyASCIIObject_CAST(key)->hash) == -1)
23562356
{
23572357
hash = PyObject_Hash(key);
23582358
if (hash == -1)

Objects/dictobject.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ static inline Py_hash_t
286286
unicode_get_hash(PyObject *o)
287287
{
288288
assert(PyUnicode_CheckExact(o));
289-
return ((PyASCIIObject*)o)->hash;
289+
return _PyASCIIObject_CAST(o)->hash;
290290
}
291291

292292
/* Print summary info about the state of the optimized allocator */

Objects/setobject.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ set_add_key(PySetObject *so, PyObject *key)
346346
Py_hash_t hash;
347347

348348
if (!PyUnicode_CheckExact(key) ||
349-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
349+
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
350350
hash = PyObject_Hash(key);
351351
if (hash == -1)
352352
return -1;
@@ -360,7 +360,7 @@ set_contains_key(PySetObject *so, PyObject *key)
360360
Py_hash_t hash;
361361

362362
if (!PyUnicode_CheckExact(key) ||
363-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
363+
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
364364
hash = PyObject_Hash(key);
365365
if (hash == -1)
366366
return -1;
@@ -374,7 +374,7 @@ set_discard_key(PySetObject *so, PyObject *key)
374374
Py_hash_t hash;
375375

376376
if (!PyUnicode_CheckExact(key) ||
377-
(hash = ((PyASCIIObject *) key)->hash) == -1) {
377+
(hash = _PyASCIIObject_CAST(key)->hash) == -1) {
378378
hash = PyObject_Hash(key);
379379
if (hash == -1)
380380
return -1;

Objects/typeobject.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -3759,7 +3759,7 @@ find_name_in_mro(PyTypeObject *type, PyObject *name, int *error)
37593759
{
37603760
Py_hash_t hash;
37613761
if (!PyUnicode_CheckExact(name) ||
3762-
(hash = ((PyASCIIObject *) name)->hash) == -1)
3762+
(hash = _PyASCIIObject_CAST(name)->hash) == -1)
37633763
{
37643764
hash = PyObject_Hash(name);
37653765
if (hash == -1) {
@@ -3853,7 +3853,7 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name)
38533853
struct type_cache_entry *entry = &cache->hashtable[h];
38543854
entry->version = type->tp_version_tag;
38553855
entry->value = res; /* borrowed */
3856-
assert(((PyASCIIObject *)(name))->hash != -1);
3856+
assert(_PyASCIIObject_CAST(name)->hash != -1);
38573857
#if MCACHE_STATS
38583858
if (entry->name != Py_None && entry->name != name) {
38593859
cache->collisions++;
@@ -8951,7 +8951,7 @@ super_init_without_args(_PyInterpreterFrame *cframe, PyCodeObject *co,
89518951
if (cframe->f_lasti >= 0) {
89528952
// MAKE_CELL and COPY_FREE_VARS have no quickened forms, so no need
89538953
// to use _PyOpcode_Deopt here:
8954-
assert(_Py_OPCODE(_PyCode_CODE(co)[0]) == MAKE_CELL ||
8954+
assert(_Py_OPCODE(_PyCode_CODE(co)[0]) == MAKE_CELL ||
89558955
_Py_OPCODE(_PyCode_CODE(co)[0]) == COPY_FREE_VARS);
89568956
assert(PyCell_Check(firstarg));
89578957
firstarg = PyCell_GET(firstarg);

Objects/unicodeobject.c

+27-30
Original file line numberDiff line numberDiff line change
@@ -113,46 +113,46 @@ extern "C" {
113113
#endif
114114

115115
#define _PyUnicode_UTF8(op) \
116-
(((PyCompactUnicodeObject*)(op))->utf8)
116+
(_PyCompactUnicodeObject_CAST(op)->utf8)
117117
#define PyUnicode_UTF8(op) \
118118
(assert(_PyUnicode_CHECK(op)), \
119119
assert(PyUnicode_IS_READY(op)), \
120120
PyUnicode_IS_COMPACT_ASCII(op) ? \
121-
((char*)((PyASCIIObject*)(op) + 1)) : \
121+
((char*)(_PyASCIIObject_CAST(op) + 1)) : \
122122
_PyUnicode_UTF8(op))
123123
#define _PyUnicode_UTF8_LENGTH(op) \
124-
(((PyCompactUnicodeObject*)(op))->utf8_length)
124+
(_PyCompactUnicodeObject_CAST(op)->utf8_length)
125125
#define PyUnicode_UTF8_LENGTH(op) \
126126
(assert(_PyUnicode_CHECK(op)), \
127127
assert(PyUnicode_IS_READY(op)), \
128128
PyUnicode_IS_COMPACT_ASCII(op) ? \
129-
((PyASCIIObject*)(op))->length : \
129+
_PyASCIIObject_CAST(op)->length : \
130130
_PyUnicode_UTF8_LENGTH(op))
131131
#define _PyUnicode_WSTR(op) \
132-
(((PyASCIIObject*)(op))->wstr)
132+
(_PyASCIIObject_CAST(op)->wstr)
133133

134134
/* Don't use deprecated macro of unicodeobject.h */
135135
#undef PyUnicode_WSTR_LENGTH
136136
#define PyUnicode_WSTR_LENGTH(op) \
137-
(PyUnicode_IS_COMPACT_ASCII(op) ? \
138-
((PyASCIIObject*)op)->length : \
139-
((PyCompactUnicodeObject*)op)->wstr_length)
137+
(PyUnicode_IS_COMPACT_ASCII(op) ? \
138+
_PyASCIIObject_CAST(op)->length : \
139+
_PyCompactUnicodeObject_CAST(op)->wstr_length)
140140
#define _PyUnicode_WSTR_LENGTH(op) \
141-
(((PyCompactUnicodeObject*)(op))->wstr_length)
141+
(_PyCompactUnicodeObject_CAST(op)->wstr_length)
142142
#define _PyUnicode_LENGTH(op) \
143-
(((PyASCIIObject *)(op))->length)
143+
(_PyASCIIObject_CAST(op)->length)
144144
#define _PyUnicode_STATE(op) \
145-
(((PyASCIIObject *)(op))->state)
145+
(_PyASCIIObject_CAST(op)->state)
146146
#define _PyUnicode_HASH(op) \
147-
(((PyASCIIObject *)(op))->hash)
147+
(_PyASCIIObject_CAST(op)->hash)
148148
#define _PyUnicode_KIND(op) \
149149
(assert(_PyUnicode_CHECK(op)), \
150-
((PyASCIIObject *)(op))->state.kind)
150+
_PyASCIIObject_CAST(op)->state.kind)
151151
#define _PyUnicode_GET_LENGTH(op) \
152152
(assert(_PyUnicode_CHECK(op)), \
153-
((PyASCIIObject *)(op))->length)
153+
_PyASCIIObject_CAST(op)->length)
154154
#define _PyUnicode_DATA_ANY(op) \
155-
(((PyUnicodeObject*)(op))->data.any)
155+
(_PyUnicodeObject_CAST(op)->data.any)
156156

157157
#undef PyUnicode_READY
158158
#define PyUnicode_READY(op) \
@@ -190,7 +190,7 @@ extern "C" {
190190
buffer where the result characters are written to. */
191191
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
192192
do { \
193-
to_type *_to = (to_type *)(to); \
193+
to_type *_to = (to_type *)(to); \
194194
const from_type *_iter = (const from_type *)(begin);\
195195
const from_type *_end = (const from_type *)(end);\
196196
Py_ssize_t n = (_end) - (_iter); \
@@ -509,21 +509,18 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
509509
#define CHECK(expr) \
510510
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
511511

512-
PyASCIIObject *ascii;
513-
unsigned int kind;
514-
515512
assert(op != NULL);
516513
CHECK(PyUnicode_Check(op));
517514

518-
ascii = (PyASCIIObject *)op;
519-
kind = ascii->state.kind;
515+
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
516+
unsigned int kind = ascii->state.kind;
520517

521518
if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
522519
CHECK(kind == PyUnicode_1BYTE_KIND);
523520
CHECK(ascii->state.ready == 1);
524521
}
525522
else {
526-
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
523+
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
527524
void *data;
528525

529526
if (ascii->state.compact == 1) {
@@ -536,7 +533,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
536533
CHECK(compact->utf8 != data);
537534
}
538535
else {
539-
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
536+
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
540537

541538
data = unicode->data.any;
542539
if (kind == PyUnicode_WCHAR_KIND) {
@@ -1330,18 +1327,18 @@ const void *_PyUnicode_data(void *unicode_raw) {
13301327
printf("obj %p\n", (void*)unicode);
13311328
printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
13321329
printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
1333-
printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1)));
1334-
printf("compact op %p\n", ((void*)((PyCompactUnicodeObject*)(unicode) + 1)));
1330+
printf("ascii op %p\n", (void*)(_PyASCIIObject_CAST(unicode) + 1));
1331+
printf("compact op %p\n", (void*)(_PyCompactUnicodeObject_CAST(unicode) + 1));
13351332
printf("compact data %p\n", _PyUnicode_COMPACT_DATA(unicode));
13361333
return PyUnicode_DATA(unicode);
13371334
}
13381335

13391336
void
13401337
_PyUnicode_Dump(PyObject *op)
13411338
{
1342-
PyASCIIObject *ascii = (PyASCIIObject *)op;
1343-
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
1344-
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
1339+
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
1340+
PyCompactUnicodeObject *compact = _PyCompactUnicodeObject_CAST(op);
1341+
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
13451342
const void *data;
13461343

13471344
if (ascii->state.compact)
@@ -1976,7 +1973,7 @@ unicode_is_singleton(PyObject *unicode)
19761973
return 1;
19771974
}
19781975

1979-
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
1976+
PyASCIIObject *ascii = _PyASCIIObject_CAST(unicode);
19801977
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
19811978
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
19821979
if (ch < 256 && LATIN1(ch) == unicode) {
@@ -16053,7 +16050,7 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
1605316050

1605416051
static void unicode_static_dealloc(PyObject *op)
1605516052
{
16056-
PyASCIIObject* ascii = (PyASCIIObject*)op;
16053+
PyASCIIObject *ascii = _PyASCIIObject_CAST(op);
1605716054

1605816055
assert(ascii->state.compact);
1605916056

0 commit comments

Comments
 (0)