Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ Timezones
Numeric
^^^^^^^
- Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`)
- Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`)
- Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`)

Conversion
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,12 @@ def cast_for_truediv(
if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(
pa_object.type
):
return arrow_array.cast(pa.float64())
# https://github.com/apache/arrow/issues/35563
# Arrow does not allow safe casting large integral values to float64.
# Intentionally not using arrow_array.cast because it could be a scalar
# value in reflected case, and safe=False only added to
# scalar cast in pyarrow 13.
return pc.cast(arrow_array, pa.float64(), safe=False)
return arrow_array

def floordiv_compat(
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3133,6 +3133,60 @@ def test_arrow_floordiv():
tm.assert_series_equal(result, expected)


def test_arrow_floordiv_large_values():
# GH 55561
a = pd.Series(
[
1425801600000000000,
1425803400000000000,
1425805200000000000,
1425807000000000000,
1425808800000000000,
1425801600000000000,
1425803400000000000,
1425805200000000000,
1425807000000000000,
1446359400000000000,
1446361200000000000,
1446363000000000000,
1446364800000000000,
1446366600000000000,
1446364800000000000,
1446366600000000000,
1446368400000000000,
1446370200000000000,
1446372000000000000,
],
dtype="int64[pyarrow]",
)
expected = pd.Series(
[
1425801600000,
1425803400000,
1425805200000,
1425807000000,
1425808800000,
1425801600000,
1425803400000,
1425805200000,
1425807000000,
1446359400000,
1446361200000,
1446363000000,
1446364800000,
1446366600000,
1446364800000,
1446366600000,
1446368400000,
1446370200000,
1446372000000,
],
dtype="int64[pyarrow]",
)
result = a // 1_000_000
tm.assert_series_equal(result, expected)


def test_string_to_datetime_parsing_cast():
# GH 56266
string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]
Expand Down