Skip to content

Commit 5521dc9

Browse files
Backport PR #57314 on branch 2.2.x (BUG: Fix near-minimum timestamp handling) (#57573)
Backport PR #57314: BUG: Fix near-minimum timestamp handling Co-authored-by: Robert Schmidtke <[email protected]>
1 parent ea56e0c commit 5521dc9

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

doc/source/whatsnew/v2.2.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
~~~~~~~~~~~~~~~~~
2222
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
2323
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
24+
- Fixed regression causing overflow for near-minimum timestamps (:issue:`57150`)
2425
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
2526
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
2627
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -482,10 +482,20 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
482482

483483
if (base == NPY_FR_ns) {
484484
int64_t nanoseconds;
485-
PD_CHECK_OVERFLOW(
486-
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
487-
PD_CHECK_OVERFLOW(
488-
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
485+
486+
// Minimum valid timestamp in nanoseconds (1677-09-21 00:12:43.145224193).
487+
const int64_t min_nanoseconds = NPY_MIN_INT64 + 1;
488+
if (microseconds == min_nanoseconds / 1000 - 1) {
489+
// For values within one microsecond of min_nanoseconds, use it as base
490+
// and offset it with nanosecond delta to avoid overflow during scaling.
491+
PD_CHECK_OVERFLOW(checked_int64_add(
492+
min_nanoseconds, (dts->ps - _NS_MIN_DTS.ps) / 1000, &nanoseconds));
493+
} else {
494+
PD_CHECK_OVERFLOW(
495+
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
496+
PD_CHECK_OVERFLOW(
497+
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
498+
}
489499

490500
return nanoseconds;
491501
}

pandas/tests/tslibs/test_array_to_datetime.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,23 @@ def test_to_datetime_barely_out_of_bounds():
296296
tslib.array_to_datetime(arr)
297297

298298

299+
@pytest.mark.parametrize(
300+
"timestamp",
301+
[
302+
# Close enough to bounds that scaling micros to nanos overflows
303+
# but adding nanos would result in an in-bounds datetime.
304+
"1677-09-21T00:12:43.145224193",
305+
"1677-09-21T00:12:43.145224999",
306+
# this always worked
307+
"1677-09-21T00:12:43.145225000",
308+
],
309+
)
310+
def test_to_datetime_barely_inside_bounds(timestamp):
311+
# see gh-57150
312+
result, _ = tslib.array_to_datetime(np.array([timestamp], dtype=object))
313+
tm.assert_numpy_array_equal(result, np.array([timestamp], dtype="M8[ns]"))
314+
315+
299316
class SubDatetime(datetime):
300317
pass
301318

0 commit comments

Comments
 (0)