Skip to content

Commit 610a60c

Browse files
methaneaerosvstinner
authored
bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (GH-20878)
Co-authored-by: Kyle Stanley <[email protected]> Co-authored-by: Victor Stinner <[email protected]> (cherry picked from commit 2c4928d)
1 parent 9a58f03 commit 610a60c

File tree

6 files changed

+75
-22
lines changed

6 files changed

+75
-22
lines changed

Doc/whatsnew/3.9.rst

+11
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,12 @@ Porting to Python 3.9
10971097
internal C API (``pycore_gc.h``).
10981098
(Contributed by Victor Stinner in :issue:`40241`.)
10991099

1100+
* The ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
1101+
:c:func:`PyUnicode_FromUnicode`, :c:func:`PyUnicode_AsUnicode`,
1102+
``_PyUnicode_AsUnicode``, and :c:func:`PyUnicode_AsUnicodeAndSize` are
1103+
marked as deprecated in C. They have been deprecated by :pep:`393` since
1104+
Python 3.3.
1105+
(Contributed by Inada Naoki in :issue:`36346`.)
11001106

11011107
Removed
11021108
-------
@@ -1165,3 +1171,8 @@ Removed
11651171

11661172
* Remove ``_PyUnicode_ClearStaticStrings()`` function.
11671173
(Contributed by Victor Stinner in :issue:`39465`.)
1174+
1175+
* Remove ``Py_UNICODE_MATCH``. It has been deprecated by :pep:`393`, and
1176+
broken since Python 3.3. The :c:func:`PyUnicode_Tailmatch` function can be
1177+
used instead.
1178+
(Contributed by Inada Naoki in :issue:`36346`.)

Include/cpython/unicodeobject.h

+23-22
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,18 @@ extern "C" {
5050
Py_UNICODE_ISDIGIT(ch) || \
5151
Py_UNICODE_ISNUMERIC(ch))
5252

53-
#define Py_UNICODE_COPY(target, source, length) \
54-
memcpy((target), (source), (length)*sizeof(Py_UNICODE))
53+
Py_DEPRECATED(3.3) static inline void
54+
Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
55+
memcpy(target, source, length * sizeof(Py_UNICODE));
56+
}
5557

56-
#define Py_UNICODE_FILL(target, value, length) \
57-
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
58-
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
59-
} while (0)
58+
Py_DEPRECATED(3.3) static inline void
59+
Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60+
Py_ssize_t i;
61+
for (i = 0; i < length; i++) {
62+
target[i] = value;
63+
}
64+
}
6065

6166
/* macros to work with surrogates */
6267
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
@@ -71,14 +76,6 @@ extern "C" {
7176
/* low surrogate = bottom 10 bits added to DC00 */
7277
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
7378

74-
/* Check if substring matches at given offset. The offset must be
75-
valid, and the substring must not be empty. */
76-
77-
#define Py_UNICODE_MATCH(string, offset, substring) \
78-
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
79-
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
80-
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
81-
8279
/* --- Unicode Type ------------------------------------------------------- */
8380

8481
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -251,10 +248,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
251248
int check_content);
252249

253250
/* Fast access macros */
254-
#define PyUnicode_WSTR_LENGTH(op) \
255-
(PyUnicode_IS_COMPACT_ASCII(op) ? \
256-
((PyASCIIObject*)op)->length : \
257-
((PyCompactUnicodeObject*)op)->wstr_length)
258251

259252
/* Returns the deprecated Py_UNICODE representation's size in code units
260253
(this includes surrogate pairs as 2 units).
@@ -449,6 +442,14 @@ enum PyUnicode_Kind {
449442
(0xffffU) : \
450443
(0x10ffffU)))))
451444

445+
Py_DEPRECATED(3.3)
446+
static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447+
return PyUnicode_IS_COMPACT_ASCII(op) ?
448+
((PyASCIIObject*)op)->length :
449+
((PyCompactUnicodeObject*)op)->wstr_length;
450+
}
451+
#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452+
452453
/* === Public API ========================================================= */
453454

454455
/* --- Plain Py_UNICODE --------------------------------------------------- */
@@ -547,7 +548,7 @@ PyAPI_FUNC(void) _PyUnicode_FastFill(
547548
only allowed if u was set to NULL.
548549
549550
The buffer is copied into the new object. */
550-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
551+
Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
551552
const Py_UNICODE *u, /* Unicode buffer */
552553
Py_ssize_t size /* size of buffer */
553554
);
@@ -576,13 +577,13 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
576577
Py_UNICODE buffer.
577578
If the wchar_t/Py_UNICODE representation is not yet available, this
578579
function will calculate it. */
579-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
580+
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
580581
PyObject *unicode /* Unicode object */
581582
);
582583

583584
/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
584585
contains null characters. */
585-
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
586+
Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
586587
PyObject *unicode /* Unicode object */
587588
);
588589

@@ -591,7 +592,7 @@ PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
591592
If the wchar_t/Py_UNICODE representation is not yet available, this
592593
function will calculate it. */
593594

594-
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
595+
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
595596
PyObject *unicode, /* Unicode object */
596597
Py_ssize_t *size /* location where to save the length */
597598
);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Mark ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
2+
``PyUnicode_FromUnicode``, ``PyUnicode_AsUnicode``, ``_PyUnicode_AsUnicode``,
3+
and ``PyUnicode_AsUnicodeAndSize`` as deprecated in C. Remove ``Py_UNICODE_MATCH``
4+
which was deprecated and broken since Python 3.3.

Modules/_testcapimodule.c

+10
Original file line numberDiff line numberDiff line change
@@ -1668,6 +1668,10 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)
16681668

16691669
static volatile int x;
16701670

1671+
/* Ignore use of deprecated APIs */
1672+
_Py_COMP_DIAG_PUSH
1673+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1674+
16711675
/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
16721676
of an error.
16731677
*/
@@ -1844,6 +1848,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
18441848

18451849
Py_RETURN_NONE;
18461850
}
1851+
_Py_COMP_DIAG_POP
18471852

18481853
static PyObject *
18491854
unicode_aswidechar(PyObject *self, PyObject *args)
@@ -2064,6 +2069,10 @@ unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
20642069
return PyUnicode_TransformDecimalToASCII(unicode, length);
20652070
}
20662071

2072+
/* Ignore use of deprecated APIs */
2073+
_Py_COMP_DIAG_PUSH
2074+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
2075+
20672076
static PyObject *
20682077
unicode_legacy_string(PyObject *self, PyObject *args)
20692078
{
@@ -2086,6 +2095,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)
20862095

20872096
return u;
20882097
}
2098+
_Py_COMP_DIAG_POP
20892099

20902100
static PyObject *
20912101
getargs_w_star(PyObject *self, PyObject *args)

Objects/unicodeobject.c

+23
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ extern "C" {
120120
_PyUnicode_UTF8_LENGTH(op))
121121
#define _PyUnicode_WSTR(op) \
122122
(((PyASCIIObject*)(op))->wstr)
123+
124+
/* Don't use deprecated macro of unicodeobject.h */
125+
#undef PyUnicode_WSTR_LENGTH
126+
#define PyUnicode_WSTR_LENGTH(op) \
127+
(PyUnicode_IS_COMPACT_ASCII(op) ? \
128+
((PyASCIIObject*)op)->length : \
129+
((PyCompactUnicodeObject*)op)->wstr_length)
123130
#define _PyUnicode_WSTR_LENGTH(op) \
124131
(((PyCompactUnicodeObject*)(op))->wstr_length)
125132
#define _PyUnicode_LENGTH(op) \
@@ -964,11 +971,14 @@ ensure_unicode(PyObject *obj)
964971
#include "stringlib/find_max_char.h"
965972
#include "stringlib/undef.h"
966973

974+
_Py_COMP_DIAG_PUSH
975+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
967976
#include "stringlib/unicodedefs.h"
968977
#include "stringlib/fastsearch.h"
969978
#include "stringlib/count.h"
970979
#include "stringlib/find.h"
971980
#include "stringlib/undef.h"
981+
_Py_COMP_DIAG_POP
972982

973983
/* --- Unicode Object ----------------------------------------------------- */
974984

@@ -4087,6 +4097,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
40874097
return w;
40884098
}
40894099

4100+
/* Deprecated APIs */
4101+
4102+
_Py_COMP_DIAG_PUSH
4103+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
4104+
40904105
Py_UNICODE *
40914106
PyUnicode_AsUnicode(PyObject *unicode)
40924107
{
@@ -4125,6 +4140,8 @@ PyUnicode_GetSize(PyObject *unicode)
41254140
return -1;
41264141
}
41274142

4143+
_Py_COMP_DIAG_POP
4144+
41284145
Py_ssize_t
41294146
PyUnicode_GetLength(PyObject *unicode)
41304147
{
@@ -12352,6 +12369,8 @@ PyUnicode_IsIdentifier(PyObject *self)
1235212369
return len && i == len;
1235312370
}
1235412371
else {
12372+
_Py_COMP_DIAG_PUSH
12373+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1235512374
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
1235612375
if (len == 0) {
1235712376
/* an empty string is not a valid identifier */
@@ -12389,6 +12408,7 @@ PyUnicode_IsIdentifier(PyObject *self)
1238912408
}
1239012409
}
1239112410
return 1;
12411+
_Py_COMP_DIAG_POP
1239212412
}
1239312413
}
1239412414

@@ -15944,7 +15964,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
1594415964
PyErr_BadArgument();
1594515965
return NULL;
1594615966
}
15967+
_Py_COMP_DIAG_PUSH
15968+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
1594715969
u = PyUnicode_AsUnicodeAndSize(unicode, &len);
15970+
_Py_COMP_DIAG_POP
1594815971
if (u == NULL)
1594915972
return NULL;
1595015973
/* Ensure we won't overflow the size. */

Python/getargs.c

+4
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
10701070
case 'u': /* raw unicode buffer (Py_UNICODE *) */
10711071
case 'Z': /* raw unicode buffer or None */
10721072
{
1073+
// TODO: Raise DeprecationWarning
1074+
_Py_COMP_DIAG_PUSH
1075+
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
10731076
Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
10741077

10751078
if (*format == '#') {
@@ -1109,6 +1112,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
11091112
arg, msgbuf, bufsize);
11101113
}
11111114
break;
1115+
_Py_COMP_DIAG_POP
11121116
}
11131117

11141118
case 'e': {/* encoded string */

0 commit comments

Comments
 (0)