From c2e9d90195bbb2b2c0b81a4fb81bbadfcd29db9e Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 30 Aug 2023 14:20:29 +0200 Subject: [PATCH 1/3] gh-108590: Correctly fix sqlite3 iterdump() with invalid Unicode sequences This also reverts 400a1cebc743515e40157ed7af86e48d654290ce. --- Lib/sqlite3/dump.py | 28 +++------------------------- Lib/test/test_sqlite3/test_dump.py | 2 +- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index 481d605194c7fe..55379c0349ba14 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,10 +7,6 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." - -from contextlib import contextmanager - - def _quote_name(name): return '"{0}"'.format(name.replace('"', '""')) @@ -19,24 +15,6 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) -def _force_decode(bs, *args, **kwargs): - # gh-108590: Don't fail if the database contains invalid Unicode data. - try: - return bs.decode(*args, **kwargs) - except UnicodeDecodeError: - return "".join([chr(c) for c in bs]) - - -@contextmanager -def _text_factory(con, factory): - saved_factory = con.text_factory - con.text_factory = factory - try: - yield - finally: - con.text_factory = saved_factory - - def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -47,6 +25,7 @@ def _iterdump(connection): """ writeable_schema = False + connection.text_factory = lambda x: str(x, errors='surrogateescape') cu = connection.cursor() yield('BEGIN TRANSACTION;') @@ -96,9 +75,8 @@ def _iterdump(connection): ) ) query_res = cu.execute(q) - with _text_factory(connection, bytes): - for row in query_res: - yield("{0};".format(_force_decode(row[0]))) + for row in query_res: + yield("{0};".format(row[0])) # Now when the type is 'index', 'trigger', or 'view' q = """ diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py index 0279ce68eeb5f1..e69bb0ff76640c 100644 --- a/Lib/test/test_sqlite3/test_dump.py +++ b/Lib/test/test_sqlite3/test_dump.py @@ -138,7 +138,7 @@ def test_dump_unicode_invalid(self): expected = [ "BEGIN TRANSACTION;", "CREATE TABLE foo (data TEXT);", - "INSERT INTO \"foo\" VALUES('a\x9f');", + "INSERT INTO \"foo\" VALUES('a\udc9f');", "COMMIT;", ] self.cu.executescript(""" From 4bb45dafaf62f8fe8332e8eddc06a74dfb97f84d Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 30 Aug 2023 14:22:30 +0200 Subject: [PATCH 2/3] Fix NEWS --- .../Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst index 50b41f2a94d9be..3344be770cbf4f 100644 --- a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst +++ b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst @@ -1 +1,3 @@ -Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson +Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an +incomplete SQL dump if a table includes invalid Unicode sequences. +Patch by Corvin McPherson and Serhiy Storchaka. From 3059ad9d73e6dde15bf4908920e024dd9e92a736 Mon Sep 17 00:00:00 2001 From: Corvin McPherson Date: Wed, 30 Aug 2023 16:23:52 +0200 Subject: [PATCH 3/3] Add back context manager --- Lib/sqlite3/dump.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index 55379c0349ba14..fa968194b6b65c 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,6 +7,10 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." + +from contextlib import contextmanager + + def _quote_name(name): return '"{0}"'.format(name.replace('"', '""')) @@ -15,6 +19,16 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) +@contextmanager +def _text_factory(con, factory): + saved_factory = con.text_factory + con.text_factory = factory + try: + yield + finally: + con.text_factory = saved_factory + + def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -25,7 +39,6 @@ def _iterdump(connection): """ writeable_schema = False - connection.text_factory = lambda x: str(x, errors='surrogateescape') cu = connection.cursor() yield('BEGIN TRANSACTION;') @@ -75,8 +88,9 @@ def _iterdump(connection): ) ) query_res = cu.execute(q) - for row in query_res: - yield("{0};".format(row[0])) + with _text_factory(connection, lambda x: str(x, errors='surrogateescape')): + for row in query_res: + yield("{0};".format(row[0])) # Now when the type is 'index', 'trigger', or 'view' q = """