From 90bcb4d7bb1406b327c08ee0446eb07216988ae3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 23 Oct 2020 10:36:56 +0200 Subject: [PATCH 1/9] add float to convert_dtypes --- pandas/core/dtypes/cast.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0f0e82f4ad4e2..03091e8323ee4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1255,6 +1255,7 @@ def convert_dtypes( convert_string: bool = True, convert_integer: bool = True, convert_boolean: bool = True, + convert_floating: bool = True, ) -> Dtype: """ Convert objects to best possible type, and optionally, @@ -1269,6 +1270,7 @@ def convert_dtypes( Whether, if possible, conversion can be done to integer extension types. convert_boolean : bool, defaults True Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True Returns ------- @@ -1304,6 +1306,16 @@ def convert_dtypes( if is_integer_dtype(inferred_dtype): inferred_dtype = input_array.dtype + if convert_floating: + if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( + input_array.dtype + ): + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = "Int64" + else: + inferred_dtype = "Float64" + if convert_boolean: if is_bool_dtype(input_array.dtype): inferred_dtype = "boolean" From 96b9b07810f9c55b9e2758631065e6ba61dde8e1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 23 Oct 2020 20:26:14 +0200 Subject: [PATCH 2/9] fix convert_floating --- pandas/core/dtypes/cast.py | 3 +++ pandas/core/generic.py | 13 +++++++++++-- pandas/core/series.py | 9 +++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 03091e8323ee4..61abfff6c1bc2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1315,6 +1315,9 @@ def convert_dtypes( inferred_dtype = "Int64" else: inferred_dtype = "Float64" + else: + if is_float_dtype(inferred_dtype): + inferred_dtype = input_array.dtype if convert_boolean: if is_bool_dtype(input_array.dtype): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c7448cf8f8e40..5619069e49527 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6088,6 +6088,7 @@ def convert_dtypes( convert_string: bool_t = True, convert_integer: bool_t = True, convert_boolean: bool_t = True, + convert_floating: bool_t = True, ) -> FrameOrSeries: """ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. @@ -6205,12 +6206,20 @@ def convert_dtypes( """ if self.ndim == 1: return self._convert_dtypes( - infer_objects, convert_string, convert_integer, convert_boolean + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, ) else: results = [ col._convert_dtypes( - infer_objects, convert_string, convert_integer, convert_boolean + infer_objects, + convert_string, + convert_integer, + convert_boolean, + convert_floating, ) for col_name, col in self.items() ] diff --git a/pandas/core/series.py b/pandas/core/series.py index d493ac0a8c051..1f4221206e5bc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4706,6 +4706,7 @@ def _convert_dtypes( convert_string: bool = True, convert_integer: bool = True, convert_boolean: bool = True, + convert_floating: bool = True, ) -> "Series": input_series = self if infer_objects: @@ -4713,9 +4714,13 @@ def _convert_dtypes( if is_object_dtype(input_series): input_series = input_series.copy() - if convert_string or convert_integer or convert_boolean: + if convert_string or convert_integer or convert_boolean or convert_floating: inferred_dtype = convert_dtypes( - input_series._values, convert_string, convert_integer, convert_boolean + input_series._values, + convert_string, + convert_integer, + convert_boolean, + convert_floating, ) try: result = input_series.astype(inferred_dtype) From bf87361e29ad07af813c8981c0327aaec7f7b269 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 17:07:38 +0100 Subject: [PATCH 3/9] TST: rewrite convert_dtypes test to make it easier extendable --- .../series/methods/test_convert_dtypes.py | 399 +++++++----------- 1 file changed, 149 insertions(+), 250 deletions(-) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 8a915324a72c1..7be1d50cd6bac 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -8,272 +8,171 @@ import pandas as pd import pandas._testing as tm +# Each test case consists of a tuple with the data and dtype to create the +# test Series, the default dtype for the expected result (which is valid +# for most cases), and the specific cases where the result deviates from +# this default. Those overrides are defined as a dict with (keyword, val) as +# dictionary key. In case of multiple items, the last override takes precendence. +test_cases = [ + ( + # data + [1, 2, 3], + # original dtype + np.dtype("int32"), + # default expected dtype + "Int32", + # exceptions on expected dtype + {("convert_integer", False): np.dtype("int32")}, + ), + ( + [1, 2, 3], + np.dtype("int64"), + "Int64", + {("convert_integer", False): np.dtype("int64")}, + ), + ( + ["x", "y", "z"], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( + [True, False, np.nan], + np.dtype("O"), + pd.BooleanDtype(), + {("convert_boolean", False): np.dtype("O")}, + ), + ( + ["h", "i", np.nan], + np.dtype("O"), + pd.StringDtype(), + {("convert_string", False): np.dtype("O")}, + ), + ( # GH32117 + ["h", "i", 1], + np.dtype("O"), + np.dtype("O"), + {}, + ), + ( + [10, np.nan, 20], + np.dtype("float"), + "Int64", + {("convert_integer", False): np.dtype("float")}, + ), + ([np.nan, 100.5, 200], np.dtype("float"), np.dtype("float"), {}), + ( + [3, 4, 5], + "Int8", + "Int8", + {}, + ), + ( + [[1, 2], [3, 4], [5]], + None, + np.dtype("O"), + {}, + ), + ( + [4, 5, 6], + np.dtype("uint32"), + "UInt32", + {("convert_integer", False): np.dtype("uint32")}, + ), + ( + [-10, 12, 13], + np.dtype("i1"), + "Int8", + {("convert_integer", False): np.dtype("i1")}, + ), + ( + [1, 2.0], + object, + "Int64", + { + ("convert_integer", False): np.dtype("float"), + ("infer_objects", False): np.dtype("object"), + }, + ), + ( + [1, 2.5], + object, + np.dtype("float"), + {("infer_objects", False): np.dtype("object")}, + ), + (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.DatetimeTZDtype(tz="UTC"), + pd.DatetimeTZDtype(tz="UTC"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + "datetime64[ns]", + np.dtype("datetime64[ns]"), + {}, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + object, + np.dtype("datetime64[ns]"), + {("infer_objects", False): np.dtype("object")}, + ), + (pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}), + ( + pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), + None, + pd.IntervalDtype("int64"), + {}, + ), +] + class TestSeriesConvertDtypes: - # The answerdict has keys that have 4 tuples, corresponding to the arguments - # infer_objects, convert_string, convert_integer, convert_boolean - # This allows all 16 possible combinations to be tested. Since common - # combinations expect the same answer, this provides an easy way to list - # all the possibilities @pytest.mark.parametrize( - "data, maindtype, answerdict", - [ - ( - [1, 2, 3], - np.dtype("int32"), - { - ((True, False), (True, False), (True,), (True, False)): "Int32", - ((True, False), (True, False), (False,), (True, False)): np.dtype( - "int32" - ), - }, - ), - ( - [1, 2, 3], - np.dtype("int64"), - { - ((True, False), (True, False), (True,), (True, False)): "Int64", - ((True, False), (True, False), (False,), (True, False)): np.dtype( - "int64" - ), - }, - ), - ( - ["x", "y", "z"], - np.dtype("O"), - { - ( - (True, False), - (True,), - (True, False), - (True, False), - ): pd.StringDtype(), - ((True, False), (False,), (True, False), (True, False)): np.dtype( - "O" - ), - }, - ), - ( - [True, False, np.nan], - np.dtype("O"), - { - ( - (True, False), - (True, False), - (True, False), - (True,), - ): pd.BooleanDtype(), - ((True, False), (True, False), (True, False), (False,)): np.dtype( - "O" - ), - }, - ), - ( - ["h", "i", np.nan], - np.dtype("O"), - { - ( - (True, False), - (True,), - (True, False), - (True, False), - ): pd.StringDtype(), - ((True, False), (False,), (True, False), (True, False)): np.dtype( - "O" - ), - }, - ), - ( # GH32117 - ["h", "i", 1], - np.dtype("O"), - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): np.dtype("O"), - }, - ), - ( - [10, np.nan, 20], - np.dtype("float"), - { - ((True, False), (True, False), (True,), (True, False)): "Int64", - ((True, False), (True, False), (False,), (True, False)): np.dtype( - "float" - ), - }, - ), - ( - [np.nan, 100.5, 200], - np.dtype("float"), - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): np.dtype("float"), - }, - ), - ( - [3, 4, 5], - "Int8", - {((True, False), (True, False), (True, False), (True, False)): "Int8"}, - ), - ( - [[1, 2], [3, 4], [5]], - None, - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): np.dtype("O"), - }, - ), - ( - [4, 5, 6], - np.dtype("uint32"), - { - ((True, False), (True, False), (True,), (True, False)): "UInt32", - ((True, False), (True, False), (False,), (True, False)): np.dtype( - "uint32" - ), - }, - ), - ( - [-10, 12, 13], - np.dtype("i1"), - { - ((True, False), (True, False), (True,), (True, False)): "Int8", - ((True, False), (True, False), (False,), (True, False)): np.dtype( - "i1" - ), - }, - ), - ( - [1, 2.0], - object, - { - ((True,), (True, False), (True,), (True, False)): "Int64", - ((True,), (True, False), (False,), (True, False)): np.dtype( - "float" - ), - ((False,), (True, False), (True, False), (True, False)): np.dtype( - "object" - ), - }, - ), - ( - [1, 2.5], - object, - { - ((True,), (True, False), (True, False), (True, False)): np.dtype( - "float" - ), - ((False,), (True, False), (True, False), (True, False)): np.dtype( - "object" - ), - }, - ), - ( - ["a", "b"], - pd.CategoricalDtype(), - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): pd.CategoricalDtype(), - }, - ), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - pd.DatetimeTZDtype(tz="UTC"), - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): pd.DatetimeTZDtype(tz="UTC"), - }, - ), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - "datetime64[ns]", - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): np.dtype("datetime64[ns]"), - }, - ), - ( - pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), - object, - { - ((True,), (True, False), (True, False), (True, False)): np.dtype( - "datetime64[ns]" - ), - ((False,), (True, False), (True, False), (True, False)): np.dtype( - "O" - ), - }, - ), - ( - pd.period_range("1/1/2011", freq="M", periods=3), - None, - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): pd.PeriodDtype("M"), - }, - ), - ( - pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), - None, - { - ( - (True, False), - (True, False), - (True, False), - (True, False), - ): pd.IntervalDtype("int64"), - }, - ), - ], + "data, maindtype, expected_default, expected_other", + test_cases, ) @pytest.mark.parametrize("params", product(*[(True, False)] * 4)) - def test_convert_dtypes(self, data, maindtype, params, answerdict): + def test_convert_dtypes( + self, data, maindtype, params, expected_default, expected_other + ): if maindtype is not None: series = pd.Series(data, dtype=maindtype) else: series = pd.Series(data) - answers = {k: a for (kk, a) in answerdict.items() for k in product(*kk)} - ns = series.convert_dtypes(*params) - expected_dtype = answers[tuple(params)] - expected = pd.Series(series.values, dtype=expected_dtype) - tm.assert_series_equal(ns, expected) + result = series.convert_dtypes(*params) + + params = dict( + zip( + [ + "infer_objects", + "convert_string", + "convert_integer", + "convert_boolean", + ], + params, + ) + ) + + expected_dtype = expected_default + for (key, val), dtype in expected_other.items(): + if params[key] is val: + expected_dtype = dtype + + expected = pd.Series(data, dtype=expected_dtype) + tm.assert_series_equal(result, expected) # Test that it is a copy copy = series.copy(deep=True) - if is_interval_dtype(ns.dtype) and ns.dtype.subtype.kind in ["i", "u"]: + if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]: msg = "Cannot set float NaN to integer-backed IntervalArray" with pytest.raises(ValueError, match=msg): - ns[ns.notna()] = np.nan + result[result.notna()] = np.nan else: - ns[ns.notna()] = np.nan + result[result.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy) From e2e6bdc8d64a0befdfd676f0c5f7a51f2e2d9483 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 17:15:33 +0100 Subject: [PATCH 4/9] formatting --- .../series/methods/test_convert_dtypes.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 7be1d50cd6bac..d44667b258414 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -145,21 +145,17 @@ def test_convert_dtypes( result = series.convert_dtypes(*params) - params = dict( - zip( - [ - "infer_objects", - "convert_string", - "convert_integer", - "convert_boolean", - ], - params, - ) - ) + param_names = [ + "infer_objects", + "convert_string", + "convert_integer", + "convert_boolean", + ] + params_dict = dict(zip(param_names, params)) expected_dtype = expected_default for (key, val), dtype in expected_other.items(): - if params[key] is val: + if params_dict[key] is val: expected_dtype = dtype expected = pd.Series(data, dtype=expected_dtype) From e411e65218b43fe52c04e4302539e70905934e3b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 17:44:35 +0100 Subject: [PATCH 5/9] update tests --- .../series/methods/test_convert_dtypes.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index d44667b258414..6ff061a1b1d11 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -58,9 +58,17 @@ [10, np.nan, 20], np.dtype("float"), "Int64", - {("convert_integer", False): np.dtype("float")}, + { + ("convert_integer", False): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype("float"), + }, + ), + ( + [np.nan, 100.5, 200], + np.dtype("float"), + "Float64", + {("convert_floating", False): np.dtype("float")}, ), - ([np.nan, 100.5, 200], np.dtype("float"), np.dtype("float"), {}), ( [3, 4, 5], "Int8", @@ -90,15 +98,19 @@ object, "Int64", { - ("convert_integer", False): np.dtype("float"), + ("convert_integer", False): "Float64", + ("convert_integer", False, "convert_floating", False): np.dtype("float"), ("infer_objects", False): np.dtype("object"), }, ), ( [1, 2.5], object, - np.dtype("float"), - {("infer_objects", False): np.dtype("object")}, + "Float64", + { + ("convert_floating", False): np.dtype("float"), + ("infer_objects", False): np.dtype("object"), + }, ), (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}), ( @@ -134,7 +146,7 @@ class TestSeriesConvertDtypes: "data, maindtype, expected_default, expected_other", test_cases, ) - @pytest.mark.parametrize("params", product(*[(True, False)] * 4)) + @pytest.mark.parametrize("params", product(*[(True, False)] * 5)) def test_convert_dtypes( self, data, maindtype, params, expected_default, expected_other ): @@ -150,12 +162,13 @@ def test_convert_dtypes( "convert_string", "convert_integer", "convert_boolean", + "convert_floating", ] params_dict = dict(zip(param_names, params)) expected_dtype = expected_default - for (key, val), dtype in expected_other.items(): - if params_dict[key] is val: + for spec, dtype in expected_other.items(): + if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])): expected_dtype = dtype expected = pd.Series(data, dtype=expected_dtype) From 031acaa220185c228e86b0f2895db6a528ee7c3e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 17:53:39 +0100 Subject: [PATCH 6/9] fix implementation for integer-like floats --- pandas/core/dtypes/cast.py | 15 +++++++++++---- .../tests/series/methods/test_convert_dtypes.py | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 61abfff6c1bc2..3ece20c1723fe 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1278,7 +1278,9 @@ def convert_dtypes( new dtype """ is_extension = is_extension_array_dtype(input_array.dtype) - if (convert_string or convert_integer or convert_boolean) and not is_extension: + if ( + convert_string or convert_integer or convert_boolean or convert_floating + ) and not is_extension: try: inferred_dtype = lib.infer_dtype(input_array) except ValueError: @@ -1310,9 +1312,14 @@ def convert_dtypes( if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( input_array.dtype ): - arr = input_array[notna(input_array)] - if (arr.astype(int) == arr).all(): - inferred_dtype = "Int64" + # if we could also convert to integer, check if all floats + # are actually integers + if convert_integer: + arr = input_array[notna(input_array)] + if (arr.astype(int) == arr).all(): + inferred_dtype = "Int64" + else: + inferred_dtype = "Float64" else: inferred_dtype = "Float64" else: diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 6ff061a1b1d11..0985c2eb49f94 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -59,7 +59,7 @@ np.dtype("float"), "Int64", { - ("convert_integer", False): "Float64", + ("convert_integer", False, "convert_floating", True): "Float64", ("convert_integer", False, "convert_floating", False): np.dtype("float"), }, ), From bf95155308617889e880d334a268cefecb6e4458 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 17:57:01 +0100 Subject: [PATCH 7/9] update docs --- pandas/core/dtypes/cast.py | 3 +++ pandas/core/generic.py | 24 ++++++++++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3ece20c1723fe..5caa628798f14 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1271,6 +1271,9 @@ def convert_dtypes( convert_boolean : bool, defaults True Whether object dtypes should be converted to ``BooleanDtypes()``. convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. Returns ------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5619069e49527..c7b6b0cfe8a44 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6105,6 +6105,12 @@ def convert_dtypes( Whether, if possible, conversion can be done to integer extension types. convert_boolean : bool, defaults True Whether object dtypes should be converted to ``BooleanDtypes()``. + convert_floating : bool, defaults True + Whether, if possible, conversion can be done to floating extension types. + If `convert_integer` is also True, preference will be give to integer + dtypes if the floats can be faithfully casted to integers. + + .. versionadded:: 1.2.0 Returns ------- @@ -6122,19 +6128,21 @@ def convert_dtypes( ----- By default, ``convert_dtypes`` will attempt to convert a Series (or each Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options - ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is - possible to turn off individual conversions to ``StringDtype``, the integer - extension types or ``BooleanDtype``, respectively. + ``convert_string``, ``convert_integer``, ``convert_boolean`` and + ``convert_boolean``, it is possible to turn off individual conversions + to ``StringDtype``, the integer extension types, ``BooleanDtype`` + or floating extension types, respectively. For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference rules as during normal Series/DataFrame construction. Then, if possible, - convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension - type, otherwise leave as ``object``. + convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer + or floating extension type, otherwise leave as ``object``. If the dtype is integer, convert to an appropriate integer extension type. If the dtype is numeric, and consists of all integers, convert to an - appropriate integer extension type. + appropriate integer extension type. Otherwise, convert to an + appropriate floating extension type. In the future, as new dtypes are added that support ``pd.NA``, the results of this method will change to support those new dtypes. @@ -6174,7 +6182,7 @@ def convert_dtypes( >>> dfn = df.convert_dtypes() >>> dfn a b c d e f - 0 1 x True h 10 NaN + 0 1 x True h 10 1 2 y False i 100.5 2 3 z 20 200.0 @@ -6184,7 +6192,7 @@ def convert_dtypes( c boolean d string e Int64 - f float64 + f Float64 dtype: object Start with a Series of strings and missing data represented by ``np.nan``. From 3106644352796d97dbb3282200ccedeb863005d1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 18:05:06 +0100 Subject: [PATCH 8/9] test + fix case of float32 as input --- pandas/core/dtypes/cast.py | 9 +++++++-- pandas/tests/series/methods/test_convert_dtypes.py | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5caa628798f14..1947d681e70f4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1315,6 +1315,11 @@ def convert_dtypes( if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( input_array.dtype ): + from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE + + inferred_float_dtype = FLOAT_STR_TO_DTYPE.get( + input_array.dtype.name, "Float64" + ) # if we could also convert to integer, check if all floats # are actually integers if convert_integer: @@ -1322,9 +1327,9 @@ def convert_dtypes( if (arr.astype(int) == arr).all(): inferred_dtype = "Int64" else: - inferred_dtype = "Float64" + inferred_dtype = inferred_float_dtype else: - inferred_dtype = "Float64" + inferred_dtype = inferred_float_dtype else: if is_float_dtype(inferred_dtype): inferred_dtype = input_array.dtype diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 0985c2eb49f94..920182a99e9ef 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -93,6 +93,12 @@ "Int8", {("convert_integer", False): np.dtype("i1")}, ), + ( + [1.2, 1.3], + np.dtype("float32"), + "Float32", + {("convert_floating", False): np.dtype("float32")}, + ), ( [1, 2.0], object, From 693a0a4c37be46dffdac95e6b0fa037e38ed870f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 29 Nov 2020 18:32:27 +0100 Subject: [PATCH 9/9] add note about changed behaviour --- pandas/core/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c7b6b0cfe8a44..c9f862d136477 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6144,6 +6144,10 @@ def convert_dtypes( appropriate integer extension type. Otherwise, convert to an appropriate floating extension type. + .. versionchanged:: 1.2 + Starting with pandas 1.2, this method also converts float columns + to the nullable floating extension type. + In the future, as new dtypes are added that support ``pd.NA``, the results of this method will change to support those new dtypes.