From 64290cee7d1fb5e7a7a0ee5d4726ad0037b21f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:31:25 +0100 Subject: [PATCH 1/8] Use `_PyUnicodeError_GetParams` for the 'namereplace' handler --- Python/codecs.c | 163 ++++++++++++++++++++++++------------------------ 1 file changed, 83 insertions(+), 80 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 2cb3875db35058..56f1891617a507 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -960,92 +960,95 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - PyObject *restuple; - PyObject *object; - Py_ssize_t i; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; - Py_UCS1 *outp; - Py_ssize_t ressize; - int replsize; - Py_UCS4 c; - char buffer[256]; /* NAME_MAXLEN */ - if (PyUnicodeEncodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) - return NULL; - _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); - if (ucnhash_capi == NULL) { - return NULL; + if (!PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + wrong_exception_type(exc); + return NULL; + } + + _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); + if (ucnhash_capi == NULL) { + return NULL; + } + + PyObject *obj; + Py_ssize_t start, end; + if (_PyUnicodeError_GetParams(exc, + &obj, NULL, + &start, &end, NULL, false) < 0) + { + return NULL; + } + + char buffer[256]; /* NAME_MAXLEN */ + Py_ssize_t i = start, ressize = 0, replsize; + for (; i < end; ++i) { + Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); + if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { + // failures of 'getname()' are ignored by this handler + replsize = 1 + 1 + 1 + (int)strlen(buffer) + 1; } - for (i = start, ressize = 0; i < end; ++i) { - /* object is guaranteed to be "ready" */ - c = PyUnicode_READ_CHAR(object, i); - if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - replsize = 1+1+1+(int)strlen(buffer)+1; - } - else if (c >= 0x10000) { - replsize = 1+1+8; - } - else if (c >= 0x100) { - replsize = 1+1+4; - } - else - replsize = 1+1+2; - if (ressize > PY_SSIZE_T_MAX - replsize) - break; - ressize += replsize; + else if (c >= 0x10000) { + replsize = 1 + 1 + 8; } - end = i; - res = PyUnicode_New(ressize, 127); - if (res==NULL) - return NULL; - for (i = start, outp = PyUnicode_1BYTE_DATA(res); - i < end; ++i) { - c = PyUnicode_READ_CHAR(object, i); - *outp++ = '\\'; - if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - *outp++ = 'N'; - *outp++ = '{'; - strcpy((char *)outp, buffer); - outp += strlen(buffer); - *outp++ = '}'; - continue; - } - if (c >= 0x00010000) { - *outp++ = 'U'; - *outp++ = Py_hexdigits[(c>>28)&0xf]; - *outp++ = Py_hexdigits[(c>>24)&0xf]; - *outp++ = Py_hexdigits[(c>>20)&0xf]; - *outp++ = Py_hexdigits[(c>>16)&0xf]; - *outp++ = Py_hexdigits[(c>>12)&0xf]; - *outp++ = Py_hexdigits[(c>>8)&0xf]; - } - else if (c >= 0x100) { - *outp++ = 'u'; - *outp++ = Py_hexdigits[(c>>12)&0xf]; - *outp++ = Py_hexdigits[(c>>8)&0xf]; - } - else - *outp++ = 'x'; - *outp++ = Py_hexdigits[(c>>4)&0xf]; - *outp++ = Py_hexdigits[c&0xf]; + else if (c >= 0x100) { + replsize = 1 + 1 + 4; } - - assert(outp == PyUnicode_1BYTE_DATA(res) + ressize); - assert(_PyUnicode_CheckConsistency(res, 1)); - restuple = Py_BuildValue("(Nn)", res, end); - Py_DECREF(object); - return restuple; + else { + replsize = 1 + 1 + 2; + } + if (ressize > PY_SSIZE_T_MAX - replsize) { + break; + } + ressize += replsize; } - else { - wrong_exception_type(exc); + + end = i; + PyObject *res = PyUnicode_New(ressize, 127); + if (res == NULL) { + Py_DECREF(obj); return NULL; } + + Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); + for (Py_ssize_t i = start; i < end; ++i) { + Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); + *outp++ = '\\'; + if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { + // failures of 'getname()' are ignored by this handler + *outp++ = 'N'; + *outp++ = '{'; + (void)strcpy((char *)outp, buffer); + outp += strlen(buffer); + *outp++ = '}'; + continue; + } + + if (c >= 0x00010000) { + *outp++ = 'U'; + *outp++ = Py_hexdigits[(c >> 28) & 0xf]; + *outp++ = Py_hexdigits[(c >> 24) & 0xf]; + *outp++ = Py_hexdigits[(c >> 20) & 0xf]; + *outp++ = Py_hexdigits[(c >> 16) & 0xf]; + *outp++ = Py_hexdigits[(c >> 12) & 0xf]; + *outp++ = Py_hexdigits[(c >> 8) & 0xf]; + } + else if (c >= 0x100) { + *outp++ = 'u'; + *outp++ = Py_hexdigits[(c >> 12) & 0xf]; + *outp++ = Py_hexdigits[(c >> 8) & 0xf]; + } + else { + *outp++ = 'x'; + } + *outp++ = Py_hexdigits[(c >> 4) & 0xf]; + *outp++ = Py_hexdigits[c & 0xf]; + } + + assert(outp == PyUnicode_1BYTE_DATA(res) + ressize); + assert(_PyUnicode_CheckConsistency(res, 1)); + PyObject *restuple = Py_BuildValue("(Nn)", res, end); + Py_DECREF(obj); + return restuple; } #define ENC_UNKNOWN -1 From a8880d1c4ee352e8b83367f3acb3354a52e6485b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 11:41:44 +0100 Subject: [PATCH 2/8] post-merge --- Python/codecs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 0b0a89eb871853..c244a1bc1eb44a 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -988,9 +988,12 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } + +// --- handler: 'namereplace' ------------------------------------------------- + PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { - if (!PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + if (!_PyIsUnicodeEncodeError(exc)) { wrong_exception_type(exc); return NULL; } @@ -1012,9 +1015,13 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) char buffer[256]; /* NAME_MAXLEN */ Py_ssize_t i = start, ressize = 0, replsize; for (; i < end; ++i) { + // If 'c' is recognized by getname(), the corresponding replacement + // is '\\' + 'U' + '{' + NAME + '}', namely 1 + 1 + 1 + len(NAME) + 1 + // characters. Otherwise, the replacement is obtained similarly as + // in PyCodec_BackslashReplaceErrors(). Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - // failures of 'getname()' are ignored by this handler + // failures of 'getname()' are ignored by the handler replsize = 1 + 1 + 1 + (int)strlen(buffer) + 1; } else if (c >= 0x10000) { @@ -1044,7 +1051,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); *outp++ = '\\'; if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - // failures of 'getname()' are ignored by this handler + // failures of 'getname()' are ignored by the handler *outp++ = 'N'; *outp++ = '{'; (void)strcpy((char *)outp, buffer); From 9d9909704ecae21fe316e3efb947db5f45d89ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 12:42:17 +0100 Subject: [PATCH 3/8] extract some logic --- Python/codecs.c | 141 ++++++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 71 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index c244a1bc1eb44a..222e6d970b4368 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -676,6 +676,60 @@ wrong_exception_type(PyObject *exc) PyObject_TypeCheck(EXC, (PyTypeObject *)PyExc_UnicodeTranslateError) +// --- codecs handlers: utilities --------------------------------------------- + +/* + * Return the number of characters (including special prefixes) + * needed to represent 'ch' by _codec_handler_write_unicode_hex(). + */ +static inline Py_ssize_t +_codec_handler_unicode_hex_width(Py_UCS4 ch) +{ + if (ch >= 0x10000) { + // format: '\\' + 'U' + 8 hex digits + return 1 + 1 + 8; + } + else if (ch >= 0x100) { + // format: '\\' + 'u' + 4 hex digits + return 1 + 1 + 4; + } + else { + // format: '\\' + 'x' + 2 hex digits + return 1 + 1 + 2; + } +} + + +/* + * Write the hexadecimal representation of 'ch' to the buffer pointed by 'p' + * using 2, 4, or 8 characters prefixed by '\x', '\u', or '\U' respectively. + */ +static inline void +_codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch) +{ + *(*p)++ = '\\'; + if (ch >= 0x10000) { + *(*p)++ = 'U'; + *(*p)++ = Py_hexdigits[(ch >> 28) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 24) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 20) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 16) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 12) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 8) & 0xf]; + } + else if (ch >= 0x100) { + *(*p)++ = 'u'; + *(*p)++ = Py_hexdigits[(ch >> 12) & 0xf]; + *(*p)++ = Py_hexdigits[(ch >> 8) & 0xf]; + } + else { + *(*p)++ = 'x'; + } + *(*p)++ = Py_hexdigits[(ch >> 4) & 0xf]; + *(*p)++ = Py_hexdigits[ch & 0xf]; +} + + // --- handler: 'strict' ------------------------------------------------------ PyObject *PyCodec_StrictErrors(PyObject *exc) @@ -942,17 +996,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_ssize_t ressize = 0; for (Py_ssize_t i = start; i < end; ++i) { - /* object is guaranteed to be "ready" */ Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); - if (c >= 0x10000) { - ressize += 1 + 1 + 8; - } - else if (c >= 0x100) { - ressize += 1 + 1 + 4; - } - else { - ressize += 1 + 1 + 2; - } + ressize += _codec_handler_unicode_hex_width(c); } PyObject *res = PyUnicode_New(ressize, 127); if (res == NULL) { @@ -962,26 +1007,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); for (Py_ssize_t i = start; i < end; ++i) { Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); - *outp++ = '\\'; - if (c >= 0x00010000) { - *outp++ = 'U'; - *outp++ = Py_hexdigits[(c >> 28) & 0xf]; - *outp++ = Py_hexdigits[(c >> 24) & 0xf]; - *outp++ = Py_hexdigits[(c >> 20) & 0xf]; - *outp++ = Py_hexdigits[(c >> 16) & 0xf]; - *outp++ = Py_hexdigits[(c >> 12) & 0xf]; - *outp++ = Py_hexdigits[(c >> 8) & 0xf]; - } - else if (c >= 0x100) { - *outp++ = 'u'; - *outp++ = Py_hexdigits[(c >> 12) & 0xf]; - *outp++ = Py_hexdigits[(c >> 8) & 0xf]; - } - else { - *outp++ = 'x'; - } - *outp++ = Py_hexdigits[(c >> 4) & 0xf]; - *outp++ = Py_hexdigits[c & 0xf]; + _codec_handler_write_unicode_hex(&outp, c); } assert(_PyUnicode_CheckConsistency(res, 1)); Py_DECREF(obj); @@ -1012,26 +1038,18 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; } - char buffer[256]; /* NAME_MAXLEN */ - Py_ssize_t i = start, ressize = 0, replsize; - for (; i < end; ++i) { - // If 'c' is recognized by getname(), the corresponding replacement - // is '\\' + 'U' + '{' + NAME + '}', namely 1 + 1 + 1 + len(NAME) + 1 - // characters. Otherwise, the replacement is obtained similarly as - // in PyCodec_BackslashReplaceErrors(). - Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); + char buffer[256]; /* NAME_MAXLEN in unicodename_db.h */ + Py_ssize_t imax = start, ressize = 0, replsize; + for (; imax < end; ++imax) { + Py_UCS4 c = PyUnicode_READ_CHAR(obj, imax); if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - // failures of 'getname()' are ignored by the handler - replsize = 1 + 1 + 1 + (int)strlen(buffer) + 1; - } - else if (c >= 0x10000) { - replsize = 1 + 1 + 8; - } - else if (c >= 0x100) { - replsize = 1 + 1 + 4; + // If 'c' is recognized by getname(), the corresponding replacement + // is '\\' + 'U' + '{' + NAME + '}', i.e. 1 + 1 + 1 + len(NAME) + 1 + // characters. Failures of 'getname()' are ignored by the handler. + replsize = 1 + 1 + 1 + strlen(buffer) + 1; } else { - replsize = 1 + 1 + 2; + replsize = _codec_handler_unicode_hex_width(c); } if (ressize > PY_SSIZE_T_MAX - replsize) { break; @@ -1039,7 +1057,6 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) ressize += replsize; } - end = i; PyObject *res = PyUnicode_New(ressize, 127); if (res == NULL) { Py_DECREF(obj); @@ -1047,47 +1064,29 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) } Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); - for (Py_ssize_t i = start; i < end; ++i) { + for (Py_ssize_t i = start; i < imax; ++i) { Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); - *outp++ = '\\'; if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { - // failures of 'getname()' are ignored by the handler + *outp++ = '\\'; *outp++ = 'N'; *outp++ = '{'; (void)strcpy((char *)outp, buffer); outp += strlen(buffer); *outp++ = '}'; - continue; - } - - if (c >= 0x00010000) { - *outp++ = 'U'; - *outp++ = Py_hexdigits[(c >> 28) & 0xf]; - *outp++ = Py_hexdigits[(c >> 24) & 0xf]; - *outp++ = Py_hexdigits[(c >> 20) & 0xf]; - *outp++ = Py_hexdigits[(c >> 16) & 0xf]; - *outp++ = Py_hexdigits[(c >> 12) & 0xf]; - *outp++ = Py_hexdigits[(c >> 8) & 0xf]; - } - else if (c >= 0x100) { - *outp++ = 'u'; - *outp++ = Py_hexdigits[(c >> 12) & 0xf]; - *outp++ = Py_hexdigits[(c >> 8) & 0xf]; } else { - *outp++ = 'x'; + _codec_handler_write_unicode_hex(&outp, c); } - *outp++ = Py_hexdigits[(c >> 4) & 0xf]; - *outp++ = Py_hexdigits[c & 0xf]; } assert(outp == PyUnicode_1BYTE_DATA(res) + ressize); assert(_PyUnicode_CheckConsistency(res, 1)); - PyObject *restuple = Py_BuildValue("(Nn)", res, end); + PyObject *restuple = Py_BuildValue("(Nn)", res, imax); Py_DECREF(obj); return restuple; } + #define ENC_UNKNOWN -1 #define ENC_UTF8 0 #define ENC_UTF16BE 1 From 3e2a7c20ed3ba38fd36fb6aed96f17c5bfac029f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:30:36 +0100 Subject: [PATCH 4/8] markup fixup --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 222e6d970b4368..2612e676d892ed 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1045,7 +1045,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { // If 'c' is recognized by getname(), the corresponding replacement // is '\\' + 'U' + '{' + NAME + '}', i.e. 1 + 1 + 1 + len(NAME) + 1 - // characters. Failures of 'getname()' are ignored by the handler. + // characters. Failures of getname() are ignored by the handler. replsize = 1 + 1 + 1 + strlen(buffer) + 1; } else { From a7906691ba34f28ca9ced6029337ccfccb215815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:48:15 +0100 Subject: [PATCH 5/8] use public names --- Python/codecs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 2612e676d892ed..43d0afe9ca4447 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -683,7 +683,7 @@ wrong_exception_type(PyObject *exc) * needed to represent 'ch' by _codec_handler_write_unicode_hex(). */ static inline Py_ssize_t -_codec_handler_unicode_hex_width(Py_UCS4 ch) +codec_handler_unicode_hex_width(Py_UCS4 ch) { if (ch >= 0x10000) { // format: '\\' + 'U' + 8 hex digits @@ -705,7 +705,7 @@ _codec_handler_unicode_hex_width(Py_UCS4 ch) * using 2, 4, or 8 characters prefixed by '\x', '\u', or '\U' respectively. */ static inline void -_codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch) +codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch) { *(*p)++ = '\\'; if (ch >= 0x10000) { @@ -997,7 +997,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_ssize_t ressize = 0; for (Py_ssize_t i = start; i < end; ++i) { Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); - ressize += _codec_handler_unicode_hex_width(c); + ressize += codec_handler_unicode_hex_width(c); } PyObject *res = PyUnicode_New(ressize, 127); if (res == NULL) { @@ -1007,7 +1007,7 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); for (Py_ssize_t i = start; i < end; ++i) { Py_UCS4 c = PyUnicode_READ_CHAR(obj, i); - _codec_handler_write_unicode_hex(&outp, c); + codec_handler_write_unicode_hex(&outp, c); } assert(_PyUnicode_CheckConsistency(res, 1)); Py_DECREF(obj); @@ -1049,7 +1049,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) replsize = 1 + 1 + 1 + strlen(buffer) + 1; } else { - replsize = _codec_handler_unicode_hex_width(c); + replsize = codec_handler_unicode_hex_width(c); } if (ressize > PY_SSIZE_T_MAX - replsize) { break; @@ -1075,7 +1075,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) *outp++ = '}'; } else { - _codec_handler_write_unicode_hex(&outp, c); + codec_handler_write_unicode_hex(&outp, c); } } From 578a8f82b3565e7284020581b892342750917ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:48:37 +0100 Subject: [PATCH 6/8] use public names --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 43d0afe9ca4447..33cf1ab87bb57d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -680,7 +680,7 @@ wrong_exception_type(PyObject *exc) /* * Return the number of characters (including special prefixes) - * needed to represent 'ch' by _codec_handler_write_unicode_hex(). + * needed to represent 'ch' by codec_handler_write_unicode_hex(). */ static inline Py_ssize_t codec_handler_unicode_hex_width(Py_UCS4 ch) From 0f60651c4303d45b5421bed4e68560a37be6ea1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 24 Jan 2025 13:49:51 +0100 Subject: [PATCH 7/8] post-merge --- Python/codecs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 33cf1ab87bb57d..df2f7010de2d96 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1430,11 +1430,14 @@ static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc) return PyCodec_BackslashReplaceErrors(exc); } -static PyObject *namereplace_errors(PyObject *self, PyObject *exc) + +static inline PyObject * +namereplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_NameReplaceErrors(exc); } + static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) { return PyCodec_SurrogatePassErrors(exc); From b2c5dddd8a92a21351cf1ae488bb72d3f87ed669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 8 Feb 2025 14:43:06 +0100 Subject: [PATCH 8/8] fix typo Co-authored-by: Petr Viktorin --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index df2f7010de2d96..6c9f8222079ec8 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1044,7 +1044,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) Py_UCS4 c = PyUnicode_READ_CHAR(obj, imax); if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) { // If 'c' is recognized by getname(), the corresponding replacement - // is '\\' + 'U' + '{' + NAME + '}', i.e. 1 + 1 + 1 + len(NAME) + 1 + // is '\\' + 'N' + '{' + NAME + '}', i.e. 1 + 1 + 1 + len(NAME) + 1 // characters. Failures of getname() are ignored by the handler. replsize = 1 + 1 + 1 + strlen(buffer) + 1; }