BUG: Series constructor overflowing for UInt64 (#50757)

phofl · web-flow · commit 9bf60aa71651 · 2023-01-18T15:58:07.000+01:00
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -1086,6 +1086,7 @@ ExtensionArray
 - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)
 - Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`)
 - Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`)
+- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
 
 Styler
 ^^^^^^
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -168,6 +168,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
             mask = mask.copy()
         return values, mask, dtype, inferred_type
 
+    original = values
     values = np.array(values, copy=copy)
     inferred_type = None
     if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
@@ -204,6 +205,22 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
     else:
         dtype = dtype.type
 
+    if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
+        if mask.all():
+            values = np.ones(values.shape, dtype=dtype)
+        else:
+            idx = np.nanargmax(values)
+            if int(values[idx]) != original[idx]:
+                # We have ints that lost precision during the cast.
+                inferred_type = lib.infer_dtype(original, skipna=True)
+                if (
+                    inferred_type not in ["floating", "mixed-integer-float"]
+                    and not mask.any()
+                ):
+                    values = np.array(original, dtype=dtype, copy=False)
+                else:
+                    values = np.array(original, dtype="object", copy=False)
+
     # we copy as need to coerce here
     if mask.any():
         values = values.copy()
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -45,6 +45,7 @@
 import pandas._testing as tm
 from pandas.core.api import Int64Index
 from pandas.core.arrays import (
+    IntegerArray,
     IntervalArray,
     period_array,
 )
@@ -2000,6 +2001,50 @@ def test_series_constructor_ea_int_from_string_bool(self):
         with pytest.raises(ValueError, match="invalid literal"):
             Series(["True", "False", "True", pd.NA], dtype="Int64")
 
+    @pytest.mark.parametrize("val", [1, 1.0])
+    def test_series_constructor_overflow_uint_ea(self, val):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, val], dtype="UInt64")
+        expected = Series(np.array([max_val, 1], dtype="uint64"), dtype="UInt64")
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("val", [1, 1.0])
+    def test_series_constructor_overflow_uint_ea_with_na(self, val):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, val, pd.NA], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([max_val, 1, 0], dtype="uint64"),
+                np.array([0, 0, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_series_constructor_overflow_uint_with_nan(self):
+        # GH#38798
+        max_val = np.iinfo(np.uint64).max - 1
+        result = Series([max_val, np.nan], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([max_val, 1], dtype="uint64"),
+                np.array([0, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_series_constructor_ea_all_na(self):
+        # GH#38798
+        result = Series([np.nan, np.nan], dtype="UInt64")
+        expected = Series(
+            IntegerArray(
+                np.array([1, 1], dtype="uint64"),
+                np.array([1, 1], dtype=np.bool_),
+            )
+        )
+        tm.assert_series_equal(result, expected)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -4,8 +4,6 @@
 from numpy import iinfo
 import pytest
 
-from pandas.compat import is_platform_arm
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -755,13 +753,7 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
         ([1.0, 1.1], "Float64", "signed", "Float64"),
         ([1, pd.NA], "Int64", "signed", "Int8"),
         ([450, -300], "Int64", "signed", "Int16"),
-        pytest.param(
-            [np.iinfo(np.uint64).max - 1, 1],
-            "UInt64",
-            "signed",
-            "UInt64",
-            marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"),
-        ),
+        ([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"),
         ([1, 1], "Int64", "unsigned", "UInt8"),
         ([1.0, 1.0], "Float32", "unsigned", "UInt8"),
         ([1.0, 1.1], "Float64", "unsigned", "Float64"),