From 79528256285496873c07a4d4602a68ee42e11faa Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Thu, 23 Dec 2021 01:25:31 +0530 Subject: [PATCH 01/13] Valueerror --- pandas/core/dtypes/cast.py | 3 +++ pandas/tests/series/test_constructors.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b70ea9f816aef..8d8a01da218ee 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2096,6 +2096,9 @@ def maybe_cast_to_integer_array( ) return casted + if all(np.dtype(i) is dtype for i in casted): + return casted + # No known cases that get here, but raising explicitly to cover our bases. raise ValueError(f"values cannot be losslessly cast to {dtype}") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 00a958f58cc93..a627529ccac7c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,6 +1810,18 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) + def test_constructor_int64_dtype(self): + # GH-44923 + result = Series(['1', '2'], dtype='int64') + expected = Series([1, 2]) + tm.assert_series_equal(result, expected) + + def test_constructor_float64_dtype(self): + # GH-44923 + result = Series(['1', '2'], dtype='float64') + expected = Series([1.0, 2.0]) + tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings( "ignore:elementwise comparison failed:DeprecationWarning" ) From 5d0362e78c490c0a3732a963bce486df9c367ec8 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Thu, 23 Dec 2021 01:27:05 +0530 Subject: [PATCH 02/13] precommit --- pandas/tests/series/test_constructors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a627529ccac7c..1076ddec09f23 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1812,13 +1812,13 @@ def test_constructor_bool_dtype_missing_values(self): def test_constructor_int64_dtype(self): # GH-44923 - result = Series(['1', '2'], dtype='int64') + result = Series(["1", "2"], dtype="int64") expected = Series([1, 2]) tm.assert_series_equal(result, expected) def test_constructor_float64_dtype(self): # GH-44923 - result = Series(['1', '2'], dtype='float64') + result = Series(["1", "2"], dtype="float64") expected = Series([1.0, 2.0]) tm.assert_series_equal(result, expected) From 730d5a9bc8c601c55db8eb63d9f6c16f1058afce Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Thu, 23 Dec 2021 11:44:53 +0530 Subject: [PATCH 03/13] Optimization --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/series/test_constructors.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8d8a01da218ee..9bf609375d83c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2096,7 +2096,7 @@ def maybe_cast_to_integer_array( ) return casted - if all(np.dtype(i) is dtype for i in casted): + if all(isinstance(i, (int, np.integer)) for i in casted): return casted # No known cases that get here, but raising explicitly to cover our bases. diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1076ddec09f23..a86faf12c6d6a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1812,14 +1812,14 @@ def test_constructor_bool_dtype_missing_values(self): def test_constructor_int64_dtype(self): # GH-44923 - result = Series(["1", "2"], dtype="int64") - expected = Series([1, 2]) + result = Series(["0", "1", "2"], dtype="int64") + expected = Series([0, 1, 2]) tm.assert_series_equal(result, expected) def test_constructor_float64_dtype(self): # GH-44923 - result = Series(["1", "2"], dtype="float64") - expected = Series([1.0, 2.0]) + result = Series(["0", "1", "2"], dtype="float64") + expected = Series([0.0, 1.0, 2.0]) tm.assert_series_equal(result, expected) @pytest.mark.filterwarnings( From 204c3c9eec554a91287c932d68dd2fc01bf53bbb Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Fri, 24 Dec 2021 01:33:39 +0530 Subject: [PATCH 04/13] infer_dtype added --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9bf609375d83c..7eef22c15e2d2 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2096,7 +2096,7 @@ def maybe_cast_to_integer_array( ) return casted - if all(isinstance(i, (int, np.integer)) for i in casted): + if lib.infer_dtype(casted) is "integer": return casted # No known cases that get here, but raising explicitly to cover our bases. From 814e5ff7cf80bf3053843e02455f8744b9ee5299 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Fri, 24 Dec 2021 01:35:30 +0530 Subject: [PATCH 05/13] precommit --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7eef22c15e2d2..4651da7d8f07c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2096,7 +2096,7 @@ def maybe_cast_to_integer_array( ) return casted - if lib.infer_dtype(casted) is "integer": + if lib.infer_dtype(casted) == "integer": return casted # No known cases that get here, but raising explicitly to cover our bases. From a76a34a0f942b75991963c38ec09ed469a4209fd Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Mon, 27 Dec 2021 21:43:01 +0530 Subject: [PATCH 06/13] Parametrised --- pandas/tests/series/test_constructors.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a86faf12c6d6a..df4a4457d61d3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,10 +1810,11 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) - def test_constructor_int64_dtype(self): + @pytest.mark.parametrize("int_dtype", ["int64"]) + def test_constructor_int64_dtype(self, int_dtype): # GH-44923 - result = Series(["0", "1", "2"], dtype="int64") - expected = Series([0, 1, 2]) + result = Series(["-1", "0", "1", "2"], dtype=int_dtype) + expected = Series([-1, 0, 1, 2]) tm.assert_series_equal(result, expected) def test_constructor_float64_dtype(self): From 8603236673946958c6372afa12c7223f4829d69e Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Fri, 31 Dec 2021 22:05:48 +0530 Subject: [PATCH 07/13] changed to any_dtype --- pandas/tests/series/test_constructors.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index df4a4457d61d3..13fc1811e2226 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,16 +1810,17 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("int_dtype", ["int64"]) - def test_constructor_int64_dtype(self, int_dtype): + @pytest.mark.parametrize("any_int_dtype", ["int64"]) + def test_constructor_int64_dtype(self, any_int_dtype): # GH-44923 - result = Series(["-1", "0", "1", "2"], dtype=int_dtype) + result = Series(["-1", "0", "1", "2"], dtype=any_int_dtype) expected = Series([-1, 0, 1, 2]) tm.assert_series_equal(result, expected) - def test_constructor_float64_dtype(self): + @pytest.mark.parametrize("any_float_dtype", ["float64"]) + def test_constructor_float64_dtype(self, any_float_dtype): # GH-44923 - result = Series(["0", "1", "2"], dtype="float64") + result = Series(["0", "1", "2"], dtype=any_float_dtype) expected = Series([0.0, 1.0, 2.0]) tm.assert_series_equal(result, expected) From 40b3872f61fcbf78031debbe34a200fdaac0c35a Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Sat, 1 Jan 2022 11:09:27 +0530 Subject: [PATCH 08/13] Added xfail --- pandas/tests/series/test_constructors.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 13fc1811e2226..21c621aa8e36d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,18 +1810,19 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("any_int_dtype", ["int64"]) + @pytest.mark.parametrize("any_int_dtype", ["int", "int8", "int16", "int32", "int64"]) def test_constructor_int64_dtype(self, any_int_dtype): # GH-44923 - result = Series(["-1", "0", "1", "2"], dtype=any_int_dtype) - expected = Series([-1, 0, 1, 2]) + result = Series(["0", "1", "2"], dtype=any_int_dtype) + expected = Series([0, 1, 2], dtype=any_int_dtype) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("any_float_dtype", ["float64"]) def test_constructor_float64_dtype(self, any_float_dtype): # GH-44923 - result = Series(["0", "1", "2"], dtype=any_float_dtype) - expected = Series([0.0, 1.0, 2.0]) + if any_float_dtype in ["Float32", "Float64"]: + pytest.xfail(reason="Cannot be casted to FloatDtype Series") + result = Series(["-1", "0", "1", "2"], dtype=any_float_dtype) + expected = Series([-1.0, 0.0, 1.0, 2.0], dtype=any_float_dtype) tm.assert_series_equal(result, expected) @pytest.mark.filterwarnings( From 13ca6df281c8737400b0a643643f00a917a84ff1 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Sat, 1 Jan 2022 11:10:59 +0530 Subject: [PATCH 09/13] precommit --- pandas/tests/series/test_constructors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 21c621aa8e36d..52b42daa0fc8f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,7 +1810,9 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("any_int_dtype", ["int", "int8", "int16", "int32", "int64"]) + @pytest.mark.parametrize( + "any_int_dtype", ["int", "int8", "int16", "int32", "int64"] + ) def test_constructor_int64_dtype(self, any_int_dtype): # GH-44923 result = Series(["0", "1", "2"], dtype=any_int_dtype) From c4840da6035700672a0277c8aa8d8115cbc22bc6 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Tue, 4 Jan 2022 11:34:31 +0530 Subject: [PATCH 10/13] Removed parametrise --- pandas/core/dtypes/cast.py | 9 +++++++-- pandas/tests/series/test_constructors.py | 3 --- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4651da7d8f07c..b6c15ced670d1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2062,8 +2062,13 @@ def maybe_cast_to_integer_array( # doesn't handle `uint64` correctly. arr = np.asarray(arr) - if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): - raise OverflowError("Trying to coerce negative values to unsigned integers") + if is_unsigned_integer_dtype(dtype): + try: + if (arr < 0).any(): + raise OverflowError("Trying to coerce negative values to unsigned integers") + except TypeError as err: + if (casted < 0).any(): + raise OverflowError("Trying to coerce negative values to unsigned integers") if is_float_dtype(arr.dtype): if not np.isfinite(arr).all(): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 52b42daa0fc8f..8256ff90ccf99 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1810,9 +1810,6 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "any_int_dtype", ["int", "int8", "int16", "int32", "int64"] - ) def test_constructor_int64_dtype(self, any_int_dtype): # GH-44923 result = Series(["0", "1", "2"], dtype=any_int_dtype) From 55410de1858c226bfdcceeea421f17224ee204e0 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Tue, 4 Jan 2022 11:40:31 +0530 Subject: [PATCH 11/13] precommit fix --- pandas/core/dtypes/cast.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b6c15ced670d1..7d4931b1b9e32 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2065,10 +2065,14 @@ def maybe_cast_to_integer_array( if is_unsigned_integer_dtype(dtype): try: if (arr < 0).any(): - raise OverflowError("Trying to coerce negative values to unsigned integers") - except TypeError as err: + raise OverflowError( + "Trying to coerce negative values to unsigned integers" + ) + except TypeError: if (casted < 0).any(): - raise OverflowError("Trying to coerce negative values to unsigned integers") + raise OverflowError( + "Trying to coerce negative values to unsigned integers" + ) if is_float_dtype(arr.dtype): if not np.isfinite(arr).all(): From eed91cbf7dcc583036f17a28e1a9c111f1841dfa Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Mon, 17 Jan 2022 19:59:58 +0530 Subject: [PATCH 12/13] removed obvious line --- pandas/core/dtypes/cast.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7d4931b1b9e32..3818d3387f718 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2083,7 +2083,7 @@ def maybe_cast_to_integer_array( if is_object_dtype(arr.dtype): raise ValueError("Trying to coerce float values to integers") - if casted.dtype < arr.dtype: + if casted.dtype < arr.dtype or lib.infer_dtype(casted) < lib.infer_dtype(arr): # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows warnings.warn( f"Values are too large to be losslessly cast to {dtype}. " @@ -2105,9 +2105,6 @@ def maybe_cast_to_integer_array( ) return casted - if lib.infer_dtype(casted) == "integer": - return casted - # No known cases that get here, but raising explicitly to cover our bases. raise ValueError(f"values cannot be losslessly cast to {dtype}") From ae6ea473042993d1d3cd098f10d0758a92889a54 Mon Sep 17 00:00:00 2001 From: shubham11941140 Date: Fri, 11 Feb 2022 21:17:50 +0530 Subject: [PATCH 13/13] Checking string dtype directly --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3818d3387f718..80a127ea4d8e0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2083,7 +2083,7 @@ def maybe_cast_to_integer_array( if is_object_dtype(arr.dtype): raise ValueError("Trying to coerce float values to integers") - if casted.dtype < arr.dtype or lib.infer_dtype(casted) < lib.infer_dtype(arr): + if casted.dtype < arr.dtype or is_string_dtype(arr.dtype): # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows warnings.warn( f"Values are too large to be losslessly cast to {dtype}. "