Skip to content

Commit 387c9bf

Browse files
Merge pull request #8682 from jorisvandenbossche/sql-8624-categorical
BUG: fix writing of Categorical with to_sql (GH8624)
2 parents f159122 + 2d711e0 commit 387c9bf

File tree

5 files changed

+25
-2
lines changed

5 files changed

+25
-2
lines changed

doc/source/categorical.rst

+1
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ relevant columns back to `category` and assign the right categories and categori
573573
df2.dtypes
574574
df2["cats"]
575575
576+
The same holds for writing to a SQL database with ``to_sql``.
576577

577578
Missing Data
578579
------------

doc/source/io.rst

+8
Original file line numberDiff line numberDiff line change
@@ -3337,6 +3337,14 @@ With some databases, writing large DataFrames can result in errors due to packet
33373337
flavors, columns with type ``timedelta64`` will be written as integer
33383338
values as nanoseconds to the database and a warning will be raised.
33393339

3340+
.. note::
3341+
3342+
Columns of ``category`` dtype will be converted to the dense representation
3343+
as you would get with ``np.asarray(categorical)`` (e.g. for string categories
3344+
this gives an array of strings).
3345+
Because of this, reading the database table back in does **not** generate
3346+
a categorical.
3347+
33403348

33413349
Reading Tables
33423350
~~~~~~~~~~~~~~

doc/source/whatsnew/v0.15.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ Bug Fixes
156156
- Bug in ``Categorical`` not created properly with ``Series.to_frame()`` (:issue:`8626`)
157157
- Bug in coercing in astype of a ``Categorical`` of a passed ``pd.Categorical`` (this now raises ``TypeError`` correctly), (:issue:`8626`)
158158
- Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`)
159-
159+
- Bug in writing Categorical columns to an SQL database with ``to_sql`` (:issue:`8624`).
160160

161161

162162

pandas/io/sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ def insert_data(self):
670670
# datetime.datetime
671671
d = b.values.astype('M8[us]').astype(object)
672672
else:
673-
d = np.array(b.values, dtype=object)
673+
d = np.array(b.get_values(), dtype=object)
674674

675675
# replace NaN with None
676676
if b._can_hold_na:

pandas/io/tests/test_sql.py

+14
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,20 @@ def test_chunksize_read(self):
678678

679679
tm.assert_frame_equal(res1, res3)
680680

681+
def test_categorical(self):
682+
# GH8624
683+
# test that categorical gets written correctly as dense column
684+
df = DataFrame(
685+
{'person_id': [1, 2, 3],
686+
'person_name': ['John P. Doe', 'Jane Dove', 'John P. Doe']})
687+
df2 = df.copy()
688+
df2['person_name'] = df2['person_name'].astype('category')
689+
690+
df2.to_sql('test_categorical', self.conn, index=False)
691+
res = sql.read_sql_query('SELECT * FROM test_categorical', self.conn)
692+
693+
tm.assert_frame_equal(res, df)
694+
681695

682696
class TestSQLApi(_TestSQLApi):
683697
"""

0 commit comments

Comments
 (0)