Skip to content

[3.9] bpo-36346: Add Py_DEPRECATED to deprecated unicode APIs (GH-20878) #20932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Doc/whatsnew/3.9.rst
Original file line number Diff line number Diff line change
@@ -1097,6 +1097,12 @@ Porting to Python 3.9
internal C API (``pycore_gc.h``).
(Contributed by Victor Stinner in :issue:`40241`.)

* The ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
:c:func:`PyUnicode_FromUnicode`, :c:func:`PyUnicode_AsUnicode`,
``_PyUnicode_AsUnicode``, and :c:func:`PyUnicode_AsUnicodeAndSize` are
marked as deprecated in C. They have been deprecated by :pep:`393` since
Python 3.3.
(Contributed by Inada Naoki in :issue:`36346`.)

Removed
-------
@@ -1165,3 +1171,8 @@ Removed

* Remove ``_PyUnicode_ClearStaticStrings()`` function.
(Contributed by Victor Stinner in :issue:`39465`.)

* Remove ``Py_UNICODE_MATCH``. It has been deprecated by :pep:`393`, and
broken since Python 3.3. The :c:func:`PyUnicode_Tailmatch` function can be
used instead.
(Contributed by Inada Naoki in :issue:`36346`.)
44 changes: 22 additions & 22 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
@@ -50,13 +50,17 @@ extern "C" {
Py_UNICODE_ISDIGIT(ch) || \
Py_UNICODE_ISNUMERIC(ch))

#define Py_UNICODE_COPY(target, source, length) \
memcpy((target), (source), (length)*sizeof(Py_UNICODE))
Py_DEPRECATED(3.3) static inline void
Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
memcpy(target, source, length * sizeof(Py_UNICODE));
}

#define Py_UNICODE_FILL(target, value, length) \
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
} while (0)
Py_DEPRECATED(3.3) static inline void
Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
for (Py_ssize_t i = 0; i < length; i++) {
target[i] = value;
}
}

/* macros to work with surrogates */
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
@@ -71,14 +75,6 @@ extern "C" {
/* low surrogate = bottom 10 bits added to DC00 */
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))

/* Check if substring matches at given offset. The offset must be
valid, and the substring must not be empty. */

#define Py_UNICODE_MATCH(string, offset, substring) \
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))

/* --- Unicode Type ------------------------------------------------------- */

/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
@@ -251,10 +247,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
int check_content);

/* Fast access macros */
#define PyUnicode_WSTR_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)op)->length : \
((PyCompactUnicodeObject*)op)->wstr_length)

/* Returns the deprecated Py_UNICODE representation's size in code units
(this includes surrogate pairs as 2 units).
@@ -449,6 +441,14 @@ enum PyUnicode_Kind {
(0xffffU) : \
(0x10ffffU)))))

Py_DEPRECATED(3.3)
static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
return PyUnicode_IS_COMPACT_ASCII(op) ?
((PyASCIIObject*)op)->length :
((PyCompactUnicodeObject*)op)->wstr_length;
}
#define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)

/* === Public API ========================================================= */

/* --- Plain Py_UNICODE --------------------------------------------------- */
@@ -547,7 +547,7 @@ PyAPI_FUNC(void) _PyUnicode_FastFill(
only allowed if u was set to NULL.

The buffer is copied into the new object. */
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
const Py_UNICODE *u, /* Unicode buffer */
Py_ssize_t size /* size of buffer */
);
@@ -576,13 +576,13 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
Py_UNICODE buffer.
If the wchar_t/Py_UNICODE representation is not yet available, this
function will calculate it. */
/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
PyObject *unicode /* Unicode object */
);

/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
contains null characters. */
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
Py_DEPRECATED(3.3) PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
PyObject *unicode /* Unicode object */
);

@@ -591,7 +591,7 @@ PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
If the wchar_t/Py_UNICODE representation is not yet available, this
function will calculate it. */

/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
PyObject *unicode, /* Unicode object */
Py_ssize_t *size /* location where to save the length */
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Mark ``Py_UNICODE_COPY``, ``Py_UNICODE_FILL``, ``PyUnicode_WSTR_LENGTH``,
``PyUnicode_FromUnicode``, ``PyUnicode_AsUnicode``, ``_PyUnicode_AsUnicode``,
and ``PyUnicode_AsUnicodeAndSize`` as deprecated in C. Remove ``Py_UNICODE_MATCH``
which was deprecated and broken since Python 3.3.
10 changes: 10 additions & 0 deletions Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
@@ -1668,6 +1668,10 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args)

static volatile int x;

/* Ignore use of deprecated APIs */
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS

/* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
of an error.
*/
@@ -1844,6 +1848,7 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))

Py_RETURN_NONE;
}
_Py_COMP_DIAG_POP

static PyObject *
unicode_aswidechar(PyObject *self, PyObject *args)
@@ -2064,6 +2069,10 @@ unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
return PyUnicode_TransformDecimalToASCII(unicode, length);
}

/* Ignore use of deprecated APIs */
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS

static PyObject *
unicode_legacy_string(PyObject *self, PyObject *args)
{
@@ -2086,6 +2095,7 @@ unicode_legacy_string(PyObject *self, PyObject *args)

return u;
}
_Py_COMP_DIAG_POP

static PyObject *
getargs_w_star(PyObject *self, PyObject *args)
23 changes: 23 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
@@ -120,6 +120,13 @@ extern "C" {
_PyUnicode_UTF8_LENGTH(op))
#define _PyUnicode_WSTR(op) \
(((PyASCIIObject*)(op))->wstr)

/* Don't use deprecated macro of unicodeobject.h */
#undef PyUnicode_WSTR_LENGTH
#define PyUnicode_WSTR_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)op)->length : \
((PyCompactUnicodeObject*)op)->wstr_length)
#define _PyUnicode_WSTR_LENGTH(op) \
(((PyCompactUnicodeObject*)(op))->wstr_length)
#define _PyUnicode_LENGTH(op) \
@@ -964,11 +971,14 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"

_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
#include "stringlib/unicodedefs.h"
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/undef.h"
_Py_COMP_DIAG_POP

/* --- Unicode Object ----------------------------------------------------- */

@@ -4087,6 +4097,11 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
return w;
}

/* Deprecated APIs */

_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS

Py_UNICODE *
PyUnicode_AsUnicode(PyObject *unicode)
{
@@ -4125,6 +4140,8 @@ PyUnicode_GetSize(PyObject *unicode)
return -1;
}

_Py_COMP_DIAG_POP

Py_ssize_t
PyUnicode_GetLength(PyObject *unicode)
{
@@ -12352,6 +12369,8 @@ PyUnicode_IsIdentifier(PyObject *self)
return len && i == len;
}
else {
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
@@ -12389,6 +12408,7 @@ PyUnicode_IsIdentifier(PyObject *self)
}
}
return 1;
_Py_COMP_DIAG_POP
}
}

@@ -15944,7 +15964,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
PyErr_BadArgument();
return NULL;
}
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
u = PyUnicode_AsUnicodeAndSize(unicode, &len);
_Py_COMP_DIAG_POP
if (u == NULL)
return NULL;
/* Ensure we won't overflow the size. */
4 changes: 4 additions & 0 deletions Python/getargs.c
Original file line number Diff line number Diff line change
@@ -1070,6 +1070,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
case 'u': /* raw unicode buffer (Py_UNICODE *) */
case 'Z': /* raw unicode buffer or None */
{
// TODO: Raise DeprecationWarning
_Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);

if (*format == '#') {
@@ -1109,6 +1112,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
arg, msgbuf, bufsize);
}
break;
_Py_COMP_DIAG_POP
}

case 'e': {/* encoded string */