From b260e75605d8ad7c0ce31dc9d3e326c967d1612a Mon Sep 17 00:00:00 2001 From: reidy-p Date: Wed, 21 Mar 2018 21:12:38 +0000 Subject: [PATCH 1/2] API: Preserve int columns in to_dict('index') --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 3 ++- pandas/tests/frame/test_convert_to.py | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1d60febe29b4a..c9653f3b7a3bd 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -715,6 +715,7 @@ Other API Changes - :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) - Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). +- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf41737a04ba6..d3eda29eceac8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1102,7 +1102,8 @@ def to_dict(self, orient='dict', into=dict): for k, v in zip(self.columns, np.atleast_1d(row))) for row in self.values] elif orient.lower().startswith('i'): - return into_c((k, v.to_dict(into)) for k, v in self.iterrows()) + return into_c((t[0], dict(zip(self.columns, t[1:]))) + for t in self.itertuples()) else: raise ValueError("orient '%s' not understood" % orient) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 024de8bc13f72..dcaa91123336f 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -5,6 +5,7 @@ import pytest import pytz import collections +from collections import OrderedDict, defaultdict import numpy as np from pandas import compat @@ -288,3 +289,27 @@ def test_frame_to_dict_tz(self): ] tm.assert_dict_equal(result[0], expected[0]) tm.assert_dict_equal(result[1], expected[1]) + + @pytest.mark.parametrize('into, expected', [ + (dict, {0: {'int_col': 1, 'float_col': 1.0}, + 1: {'int_col': 2, 'float_col': 2.0}, + 2: {'int_col': 3, 'float_col': 3.0}}), + (OrderedDict, OrderedDict([(0, {'int_col': 1, 'float_col': 1.0}), + (1, {'int_col': 2, 'float_col': 2.0}), + (2, {'int_col': 3, 'float_col': 3.0})])), + (defaultdict(list), defaultdict(list, + {0: {'int_col': 1, 'float_col': 1.0}, + 1: {'int_col': 2, 'float_col': 2.0}, + 2: {'int_col': 3, 'float_col': 3.0}})) + ]) + def test_to_dict_index_dtypes(self, into, expected): + # GH 18580 + # When using to_dict(orient='index') on a dataframe with int + # and float columns only the int columns were cast to float + + df = DataFrame({'int_col': [1, 2, 3], + 'float_col': [1.0, 2.0, 3.0]}) + + result = df.to_dict(orient='index', into=into) + tm.assert_frame_equal(DataFrame.from_dict(result, orient='index'), + DataFrame.from_dict(expected, orient='index')) From 16e1a08d07e6f74b01a863548fddcba1e9651a7e Mon Sep 17 00:00:00 2001 From: reidy-p Date: Thu, 22 Mar 2018 19:29:45 +0000 Subject: [PATCH 2/2] Ensure column order --- pandas/tests/frame/test_convert_to.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index dcaa91123336f..82dadacd5b1ac 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -311,5 +311,7 @@ def test_to_dict_index_dtypes(self, into, expected): 'float_col': [1.0, 2.0, 3.0]}) result = df.to_dict(orient='index', into=into) - tm.assert_frame_equal(DataFrame.from_dict(result, orient='index'), - DataFrame.from_dict(expected, orient='index')) + cols = ['int_col', 'float_col'] + result = DataFrame.from_dict(result, orient='index')[cols] + expected = DataFrame.from_dict(expected, orient='index')[cols] + tm.assert_frame_equal(result, expected)