Skip to content

Commit 9bf60aa

Browse files
authored
BUG: Series constructor overflowing for UInt64 (#50757)
1 parent 3f0af5e commit 9bf60aa

File tree

4 files changed

+64
-9
lines changed

4 files changed

+64
-9
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1086,6 +1086,7 @@ ExtensionArray
10861086
- Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)
10871087
- Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`)
10881088
- Bug in :meth:`api.types.is_integer_dtype`, :meth:`api.types.is_unsigned_integer_dtype`, :meth:`api.types.is_signed_integer_dtype`, :meth:`api.types.is_float_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``kind`` returned the corresponding NumPy type (:issue:`50667`)
1089+
- Bug in :class:`Series` constructor unnecessarily overflowing for nullable unsigned integer dtypes (:issue:`38798`, :issue:`25880`)
10891090

10901091
Styler
10911092
^^^^^^

pandas/core/arrays/numeric.py

+17
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
168168
mask = mask.copy()
169169
return values, mask, dtype, inferred_type
170170

171+
original = values
171172
values = np.array(values, copy=copy)
172173
inferred_type = None
173174
if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
@@ -204,6 +205,22 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
204205
else:
205206
dtype = dtype.type
206207

208+
if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:
209+
if mask.all():
210+
values = np.ones(values.shape, dtype=dtype)
211+
else:
212+
idx = np.nanargmax(values)
213+
if int(values[idx]) != original[idx]:
214+
# We have ints that lost precision during the cast.
215+
inferred_type = lib.infer_dtype(original, skipna=True)
216+
if (
217+
inferred_type not in ["floating", "mixed-integer-float"]
218+
and not mask.any()
219+
):
220+
values = np.array(original, dtype=dtype, copy=False)
221+
else:
222+
values = np.array(original, dtype="object", copy=False)
223+
207224
# we copy as need to coerce here
208225
if mask.any():
209226
values = values.copy()

pandas/tests/series/test_constructors.py

+45
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import pandas._testing as tm
4646
from pandas.core.api import Int64Index
4747
from pandas.core.arrays import (
48+
IntegerArray,
4849
IntervalArray,
4950
period_array,
5051
)
@@ -2000,6 +2001,50 @@ def test_series_constructor_ea_int_from_string_bool(self):
20002001
with pytest.raises(ValueError, match="invalid literal"):
20012002
Series(["True", "False", "True", pd.NA], dtype="Int64")
20022003

2004+
@pytest.mark.parametrize("val", [1, 1.0])
2005+
def test_series_constructor_overflow_uint_ea(self, val):
2006+
# GH#38798
2007+
max_val = np.iinfo(np.uint64).max - 1
2008+
result = Series([max_val, val], dtype="UInt64")
2009+
expected = Series(np.array([max_val, 1], dtype="uint64"), dtype="UInt64")
2010+
tm.assert_series_equal(result, expected)
2011+
2012+
@pytest.mark.parametrize("val", [1, 1.0])
2013+
def test_series_constructor_overflow_uint_ea_with_na(self, val):
2014+
# GH#38798
2015+
max_val = np.iinfo(np.uint64).max - 1
2016+
result = Series([max_val, val, pd.NA], dtype="UInt64")
2017+
expected = Series(
2018+
IntegerArray(
2019+
np.array([max_val, 1, 0], dtype="uint64"),
2020+
np.array([0, 0, 1], dtype=np.bool_),
2021+
)
2022+
)
2023+
tm.assert_series_equal(result, expected)
2024+
2025+
def test_series_constructor_overflow_uint_with_nan(self):
2026+
# GH#38798
2027+
max_val = np.iinfo(np.uint64).max - 1
2028+
result = Series([max_val, np.nan], dtype="UInt64")
2029+
expected = Series(
2030+
IntegerArray(
2031+
np.array([max_val, 1], dtype="uint64"),
2032+
np.array([0, 1], dtype=np.bool_),
2033+
)
2034+
)
2035+
tm.assert_series_equal(result, expected)
2036+
2037+
def test_series_constructor_ea_all_na(self):
2038+
# GH#38798
2039+
result = Series([np.nan, np.nan], dtype="UInt64")
2040+
expected = Series(
2041+
IntegerArray(
2042+
np.array([1, 1], dtype="uint64"),
2043+
np.array([1, 1], dtype=np.bool_),
2044+
)
2045+
)
2046+
tm.assert_series_equal(result, expected)
2047+
20032048

20042049
class TestSeriesConstructorIndexCoercion:
20052050
def test_series_constructor_datetimelike_index_coercion(self):

pandas/tests/tools/test_to_numeric.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
from numpy import iinfo
55
import pytest
66

7-
from pandas.compat import is_platform_arm
8-
97
import pandas as pd
108
from pandas import (
119
DataFrame,
@@ -755,13 +753,7 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
755753
([1.0, 1.1], "Float64", "signed", "Float64"),
756754
([1, pd.NA], "Int64", "signed", "Int8"),
757755
([450, -300], "Int64", "signed", "Int16"),
758-
pytest.param(
759-
[np.iinfo(np.uint64).max - 1, 1],
760-
"UInt64",
761-
"signed",
762-
"UInt64",
763-
marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"),
764-
),
756+
([np.iinfo(np.uint64).max - 1, 1], "UInt64", "signed", "UInt64"),
765757
([1, 1], "Int64", "unsigned", "UInt8"),
766758
([1.0, 1.0], "Float32", "unsigned", "UInt8"),
767759
([1.0, 1.1], "Float64", "unsigned", "Float64"),

0 commit comments

Comments
 (0)