Skip to content

Commit 5269c09

Browse files
author
Erlend Egeberg Aasland
authored
bpo-44688: Remove ASCII limitation from sqlite3 collation names (GH-27395)
1 parent 8d06474 commit 5269c09

File tree

7 files changed

+35
-60
lines changed

7 files changed

+35
-60
lines changed

Doc/library/sqlite3.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,10 @@ Connection Objects
402402

403403
con.create_collation("reverse", None)
404404

405+
.. versionchanged:: 3.11
406+
The collation name can contain any Unicode character. Earlier, only
407+
ASCII characters were allowed.
408+
405409

406410
.. method:: interrupt()
407411

Doc/whatsnew/3.11.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ sqlite3
213213
:meth:`~sqlite3.Connection.set_authorizer`.
214214
(Contributed by Erlend E. Aasland in :issue:`44491`.)
215215

216+
* Collation name :meth:`~sqlite3.Connection.create_collation` can now
217+
contain any Unicode character. Collation names with invalid characters
218+
now raise :exc:`UnicodeEncodeError` instead of :exc:`sqlite3.ProgrammingError`.
219+
(Contributed by Erlend E. Aasland in :issue:`44688`.)
220+
216221

217222
Removed
218223
=======

Lib/sqlite3/test/hooks.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ def test_create_collation_not_callable(self):
4040

4141
def test_create_collation_not_ascii(self):
4242
con = sqlite.connect(":memory:")
43-
with self.assertRaises(sqlite.ProgrammingError):
44-
con.create_collation("collä", lambda x, y: (x > y) - (x < y))
43+
con.create_collation("collä", lambda x, y: (x > y) - (x < y))
4544

4645
def test_create_collation_bad_upper(self):
4746
class BadUpperStr(str):

Lib/sqlite3/test/regression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def test_connection_call(self):
278278
def test_collation(self):
279279
def collation_cb(a, b):
280280
return 1
281-
self.assertRaises(sqlite.ProgrammingError, self.con.create_collation,
281+
self.assertRaises(UnicodeEncodeError, self.con.create_collation,
282282
# Lone surrogate cannot be encoded to the default encoding (utf8)
283283
"\uDC80", collation_cb)
284284

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:meth:`sqlite3.Connection.create_collation` now accepts non-ASCII collation
2+
names. Patch by Erlend E. Aasland.

Modules/_sqlite/clinic/connection.c.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -722,13 +722,14 @@ PyDoc_STRVAR(pysqlite_connection_create_collation__doc__,
722722

723723
static PyObject *
724724
pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
725-
PyObject *name, PyObject *callable);
725+
const char *name,
726+
PyObject *callable);
726727

727728
static PyObject *
728729
pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs)
729730
{
730731
PyObject *return_value = NULL;
731-
PyObject *name;
732+
const char *name;
732733
PyObject *callable;
733734

734735
if (!_PyArg_CheckPositional("create_collation", nargs, 2, 2)) {
@@ -738,10 +739,15 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const
738739
_PyArg_BadArgument("create_collation", "argument 1", "str", args[0]);
739740
goto exit;
740741
}
741-
if (PyUnicode_READY(args[0]) == -1) {
742+
Py_ssize_t name_length;
743+
name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
744+
if (name == NULL) {
745+
goto exit;
746+
}
747+
if (strlen(name) != (size_t)name_length) {
748+
PyErr_SetString(PyExc_ValueError, "embedded null character");
742749
goto exit;
743750
}
744-
name = args[0];
745751
callable = args[1];
746752
return_value = pysqlite_connection_create_collation_impl(self, name, callable);
747753

@@ -811,4 +817,4 @@ pysqlite_connection_exit(pysqlite_Connection *self, PyObject *const *args, Py_ss
811817
#ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
812818
#define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
813819
#endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
814-
/*[clinic end generated code: output=30f11f2d8f09bdf0 input=a9049054013a1b77]*/
820+
/*[clinic end generated code: output=a7a899c4e41381ac input=a9049054013a1b77]*/

Modules/_sqlite/connection.c

Lines changed: 11 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1720,7 +1720,7 @@ pysqlite_connection_backup_impl(pysqlite_Connection *self,
17201720
/*[clinic input]
17211721
_sqlite3.Connection.create_collation as pysqlite_connection_create_collation
17221722
1723-
name: unicode
1723+
name: str
17241724
callback as callable: object
17251725
/
17261726
@@ -1729,61 +1729,26 @@ Creates a collation function. Non-standard.
17291729

17301730
static PyObject *
17311731
pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
1732-
PyObject *name, PyObject *callable)
1733-
/*[clinic end generated code: output=0f63b8995565ae22 input=5c3898813a776cf2]*/
1732+
const char *name,
1733+
PyObject *callable)
1734+
/*[clinic end generated code: output=a4ceaff957fdef9a input=301647aab0f2fb1d]*/
17341735
{
1735-
PyObject* uppercase_name = 0;
1736-
Py_ssize_t i, len;
1737-
_Py_IDENTIFIER(upper);
1738-
const char *uppercase_name_str;
1739-
int rc;
1740-
unsigned int kind;
1741-
const void *data;
1742-
17431736
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
1744-
goto finally;
1745-
}
1746-
1747-
uppercase_name = _PyObject_CallMethodIdOneArg((PyObject *)&PyUnicode_Type,
1748-
&PyId_upper, name);
1749-
if (!uppercase_name) {
1750-
goto finally;
1751-
}
1752-
1753-
if (PyUnicode_READY(uppercase_name))
1754-
goto finally;
1755-
len = PyUnicode_GET_LENGTH(uppercase_name);
1756-
kind = PyUnicode_KIND(uppercase_name);
1757-
data = PyUnicode_DATA(uppercase_name);
1758-
for (i=0; i<len; i++) {
1759-
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1760-
if ((ch >= '0' && ch <= '9')
1761-
|| (ch >= 'A' && ch <= 'Z')
1762-
|| (ch == '_'))
1763-
{
1764-
continue;
1765-
} else {
1766-
PyErr_SetString(self->ProgrammingError,
1767-
"invalid character in collation name");
1768-
goto finally;
1769-
}
1737+
return NULL;
17701738
}
17711739

1772-
uppercase_name_str = PyUnicode_AsUTF8(uppercase_name);
1773-
if (!uppercase_name_str)
1774-
goto finally;
1775-
1740+
int rc;
17761741
int flags = SQLITE_UTF8;
17771742
if (callable == Py_None) {
1778-
rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
1743+
rc = sqlite3_create_collation_v2(self->db, name, flags,
17791744
NULL, NULL, NULL);
17801745
}
17811746
else {
17821747
if (!PyCallable_Check(callable)) {
17831748
PyErr_SetString(PyExc_TypeError, "parameter must be callable");
1784-
goto finally;
1749+
return NULL;
17851750
}
1786-
rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags,
1751+
rc = sqlite3_create_collation_v2(self->db, name, flags,
17871752
Py_NewRef(callable),
17881753
&pysqlite_collation_callback,
17891754
&_destructor);
@@ -1798,16 +1763,10 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
17981763
Py_DECREF(callable);
17991764
}
18001765
_pysqlite_seterror(self->db);
1801-
goto finally;
1802-
}
1803-
1804-
finally:
1805-
Py_XDECREF(uppercase_name);
1806-
1807-
if (PyErr_Occurred()) {
18081766
return NULL;
18091767
}
1810-
return Py_NewRef(Py_None);
1768+
1769+
Py_RETURN_NONE;
18111770
}
18121771

18131772
/*[clinic input]

0 commit comments

Comments
 (0)