diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index f5dbfd80de7cc..b3ce9911d3f4d 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -388,6 +388,8 @@ Bug Fixes - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) + +- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 97df81ad6be48..c931adc9a31df 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1529,6 +1529,20 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, if slicer is not None: values = values[:, slicer] + # see gh-13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == '.': + mask = isnull(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype='object') + + values[mask] = na_rep + return values + from pandas.formats.format import FloatArrayFormatter formatter = FloatArrayFormatter(values, na_rep=na_rep, float_format=float_format, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 923ac25f0ebed..a8e184ce94c89 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +""" +Internal module for formatting output data in csv, html, +and latex files. This module also applies to display formatting. +""" + from __future__ import print_function from distutils.version import LooseVersion # pylint: disable=W0141 diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index bacf604c491b1..c23702ef46ad2 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -824,35 +824,6 @@ def test_to_csv_float_format(self): index=['A', 'B'], columns=['X', 'Y', 'Z']) assert_frame_equal(rs, xp) - def test_to_csv_quoting(self): - df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']}) - - buf = StringIO() - df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC) - - result = buf.getvalue() - expected = ('"A","B"\n' - '1,"foo"\n' - '2,"bar"\n' - '3,"baz"\n') - - self.assertEqual(result, expected) - - # quoting windows line terminators, presents with encoding? - # #3503 - text = 'a,b,c\n1,"test \r\n",3\n' - df = pd.read_csv(StringIO(text)) - buf = StringIO() - df.to_csv(buf, encoding='utf-8', index=False) - self.assertEqual(buf.getvalue(), text) - - # testing if quoting parameter is passed through with multi-indexes - # related to issue #7791 - df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) - df = df.set_index(['a', 'b']) - expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' - self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected) - def test_to_csv_unicodewriter_quoting(self): df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']}) @@ -1131,3 +1102,83 @@ def test_to_csv_with_dst_transitions(self): df.to_pickle(path) result = pd.read_pickle(path) assert_frame_equal(result, df) + + def test_to_csv_quoting(self): + df = DataFrame({ + 'c_string': ['a', 'b,c'], + 'c_int': [42, np.nan], + 'c_float': [1.0, 3.2], + 'c_bool': [True, False], + }) + + expected = """\ +,c_bool,c_float,c_int,c_string +0,True,1.0,42.0,a +1,False,3.2,,"b,c" +""" + result = df.to_csv() + self.assertEqual(result, expected) + + result = df.to_csv(quoting=None) + self.assertEqual(result, expected) + + result = df.to_csv(quoting=csv.QUOTE_MINIMAL) + self.assertEqual(result, expected) + + expected = """\ +"","c_bool","c_float","c_int","c_string" +"0","True","1.0","42.0","a" +"1","False","3.2","","b,c" +""" + result = df.to_csv(quoting=csv.QUOTE_ALL) + self.assertEqual(result, expected) + + # see gh-12922, gh-13259: make sure changes to + # the formatters do not break this behaviour + expected = """\ +"","c_bool","c_float","c_int","c_string" +0,True,1.0,42.0,"a" +1,False,3.2,"","b,c" +""" + result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) + self.assertEqual(result, expected) + + msg = "need to escape, but no escapechar set" + tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE) + tm.assertRaisesRegexp(csv.Error, msg, df.to_csv, + quoting=csv.QUOTE_NONE, + escapechar=None) + + expected = """\ +,c_bool,c_float,c_int,c_string +0,True,1.0,42.0,a +1,False,3.2,,b!,c +""" + result = df.to_csv(quoting=csv.QUOTE_NONE, + escapechar='!') + self.assertEqual(result, expected) + + expected = """\ +,c_bool,c_ffloat,c_int,c_string +0,True,1.0,42.0,a +1,False,3.2,,bf,c +""" + result = df.to_csv(quoting=csv.QUOTE_NONE, + escapechar='f') + self.assertEqual(result, expected) + + # see gh-3503: quoting Windows line terminators + # presents with encoding? + text = 'a,b,c\n1,"test \r\n",3\n' + df = pd.read_csv(StringIO(text)) + buf = StringIO() + df.to_csv(buf, encoding='utf-8', index=False) + self.assertEqual(buf.getvalue(), text) + + # xref gh-7791: make sure the quoting parameter is passed through + # with multi-indexes + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + df = df.set_index(['a', 'b']) + expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n' + self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)