Skip to content

ENH: retain reso in Timestamp(dt64_obj) #49008

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Other API changes
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ cpdef array_to_datetime(
found_tz = True
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value
elif found_naive:
raise ValueError('Tz-aware datetime.datetime '
Expand All @@ -527,6 +528,7 @@ cpdef array_to_datetime(
found_tz = True
tz_out = val.tzinfo
_ts = convert_datetime_to_tsobject(val, None)
_ts.ensure_reso(NPY_FR_ns)
iresult[i] = _ts.value

else:
Expand All @@ -535,7 +537,7 @@ cpdef array_to_datetime(
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if isinstance(val, _Timestamp):
iresult[i] = val.value
iresult[i] = val._as_unit("ns").value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ cdef class _TSObject:
int64_t value # numpy dt64
tzinfo tzinfo
bint fold
NPY_DATETIMEUNIT reso

cdef void ensure_reso(self, NPY_DATETIMEUNIT reso)


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand Down
17 changes: 14 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ import_datetime()
from pandas._libs.tslibs.base cimport ABCTimestamp
from pandas._libs.tslibs.dtypes cimport (
abbrev_to_npy_unit,
get_supported_reso,
periods_per_second,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
NPY_FR_ns,
check_dts_bounds,
convert_reso,
get_datetime64_unit,
get_datetime64_value,
get_implementation_bounds,
Expand Down Expand Up @@ -204,10 +206,16 @@ cdef class _TSObject:
# int64_t value # numpy dt64
# tzinfo tzinfo
# bint fold
# NPY_DATETIMEUNIT reso

def __cinit__(self):
# GH 25057. As per PEP 495, set fold to 0 by default
self.fold = 0
self.reso = NPY_FR_ns # default value

cdef void ensure_reso(self, NPY_DATETIMEUNIT reso):
if self.reso != reso:
self.value = convert_reso(self.value, self.reso, reso, False)


cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
Expand All @@ -228,6 +236,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
"""
cdef:
_TSObject obj
NPY_DATETIMEUNIT reso

obj = _TSObject()

Expand All @@ -237,9 +246,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
if ts is None or ts is NaT:
obj.value = NPY_NAT
elif is_datetime64_object(ts):
obj.value = get_datetime64_nanos(ts, NPY_FR_ns)
reso = get_supported_reso(get_datetime64_unit(ts))
obj.reso = reso
obj.value = get_datetime64_nanos(ts, reso)
if obj.value != NPY_NAT:
pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts)
pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
elif is_integer_object(ts):
try:
ts = <int64_t>ts
Expand Down Expand Up @@ -295,7 +306,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
f'Timestamp')

maybe_localize_tso(obj, tz, NPY_FR_ns)
maybe_localize_tso(obj, tz, obj.reso)
return obj


Expand Down
63 changes: 13 additions & 50 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport (
is_array,
is_datetime64_object,
is_integer_object,
is_timedelta64_object,
)

from pandas._libs.tslibs.fields import (
Expand Down Expand Up @@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport (
from pandas._libs.tslibs.timedeltas cimport (
_Timedelta,
delta_to_nanoseconds,
ensure_td64ns,
is_any_td_scalar,
)

Expand Down Expand Up @@ -282,6 +280,7 @@ cdef class _Timestamp(ABCTimestamp):
)

obj.value = value
obj.reso = reso
pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
maybe_localize_tso(obj, tz, reso)

Expand Down Expand Up @@ -432,62 +431,26 @@ cdef class _Timestamp(ABCTimestamp):
int64_t nanos = 0

if is_any_td_scalar(other):
if is_timedelta64_object(other):
other_reso = get_datetime64_unit(other)
if (
other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
elif (
other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
):
# TODO: deprecate allowing these? or handle more like the
# corresponding DateOffsets?
# TODO: no tests get here
other = ensure_td64ns(other)
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns

if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: no tests
other = ensure_td64ns(other)
if other_reso > self._reso:
# Following numpy, we cast to the higher resolution
# test_sub_timedelta64_mismatched_reso
self = (<_Timestamp>self)._as_reso(other_reso)


if isinstance(other, _Timedelta):
# TODO: share this with __sub__, Timedelta.__add__
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._reso < other._reso:
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
elif self._reso > other._reso:
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
other = Timedelta(other)

try:
nanos = delta_to_nanoseconds(
other, reso=self._reso, round_ok=False
)
except OutOfBoundsTimedelta:
raise
# TODO: share this with __sub__, Timedelta.__add__
# Matching numpy, we cast to the higher resolution. Unlike numpy,
# we raise instead of silently overflowing during this casting.
if self._reso < other._reso:
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
elif self._reso > other._reso:
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)

try:
new_value = self.value + nanos
except OverflowError:
# Use Python ints
# Hit in test_tdi_add_overflow
new_value = int(self.value) + int(nanos)
nanos = other.value

try:
new_value = self.value + nanos
result = type(self)._from_value_and_reso(
new_value, reso=self._reso, tz=self.tzinfo
)
except OverflowError as err:
# TODO: don't hard-code nanosecond here
new_value = int(self.value) + int(nanos)
raise OutOfBoundsDatetime(
f"Out of bounds nanosecond timestamp: {new_value}"
) from err
Expand Down Expand Up @@ -1713,7 +1676,7 @@ class Timestamp(_Timestamp):
if not is_offset_object(freq):
freq = to_offset(freq)

return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold)
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.reso)

def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'):
cdef:
Expand Down
24 changes: 22 additions & 2 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3113,14 +3113,34 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
with pytest.raises(TypeError, match=msg):
constructor(scalar, dtype=dtype)

@pytest.mark.xfail(
reason="Timestamp constructor has been updated to cast dt64 to non-nano, "
"but DatetimeArray._from_sequence has not"
)
@pytest.mark.parametrize("cls", [datetime, np.datetime64])
def test_from_out_of_bounds_datetime(self, constructor, cls):
def test_from_out_of_ns_bounds_datetime(self, constructor, cls, request):
# scalar that won't fit in nanosecond dt64, but will fit in microsecond
scalar = datetime(9999, 1, 1)
exp_dtype = "M8[us]" # smallest reso that fits
if cls is np.datetime64:
scalar = np.datetime64(scalar, "D")
exp_dtype = "M8[s]" # closest reso to input
result = constructor(scalar)

assert type(get1(result)) is cls
item = get1(result)
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]

assert type(item) is Timestamp
assert item.asm8.dtype == exp_dtype
assert dtype == exp_dtype

def test_out_of_s_bounds_datetime64(self, constructor):
scalar = np.datetime64(np.iinfo(np.int64).max, "D")
result = constructor(scalar)
item = get1(result)
assert type(item) is np.datetime64
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
assert dtype == object

@pytest.mark.xfail(
reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, "
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/scalar/timedelta/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ def test_td_add_datetimelike_scalar(self, op):
assert result is NaT

def test_td_add_timestamp_overflow(self):
msg = "Cannot cast 259987 from D to 'ns' without overflow"
msg = "Cannot cast 259987 from D to 'ns' without overflow."
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")

msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timestamp("1700-01-01") + timedelta(days=13 * 19999)

Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_overflow_offset_raises(self):
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
"will overflow"
)
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"
lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"

with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
stamp + offset_overflow
Expand All @@ -62,7 +62,9 @@ def test_overflow_offset_raises(self):
stamp = Timestamp("2000/1/1")
offset_overflow = to_offset("D") * 100**5

lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
lmsg3 = (
r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow"
)
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
stamp + offset_overflow

Expand Down
32 changes: 29 additions & 3 deletions pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pytest
import pytz

from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas.compat import PY310
from pandas.errors import OutOfBoundsDatetime

Expand Down Expand Up @@ -455,14 +456,26 @@ def test_out_of_bounds_value(self):
Timestamp(min_ts_us)
Timestamp(max_ts_us)

# We used to raise on these before supporting non-nano
us_val = NpyDatetimeUnit.NPY_FR_us.value
assert Timestamp(min_ts_us - one_us)._reso == us_val
assert Timestamp(max_ts_us + one_us)._reso == us_val

# https://github.com/numpy/numpy/issues/22346 for why
# we can't use the same construction as above with minute resolution

# too_low, too_high are the _just_ outside the range of M8[s]
too_low = np.datetime64("-292277022657-01-27T08:29", "m")
too_high = np.datetime64("292277026596-12-04T15:31", "m")

msg = "Out of bounds"
# One us less than the minimum is an error
with pytest.raises(ValueError, match=msg):
Timestamp(min_ts_us - one_us)
Timestamp(too_low)

# One us more than the maximum is an error
with pytest.raises(ValueError, match=msg):
Timestamp(max_ts_us + one_us)
Timestamp(too_high)

def test_out_of_bounds_string(self):
msg = "Out of bounds"
Expand All @@ -487,7 +500,20 @@ def test_bounds_with_different_units(self):
for date_string in out_of_bounds_dates:
for unit in time_units:
dt64 = np.datetime64(date_string, unit)
msg = "Out of bounds"
ts = Timestamp(dt64)
if unit in ["s", "ms", "us"]:
# We can preserve the input unit
assert ts.value == dt64.view("i8")
else:
# we chose the closest unit that we _do_ support
assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value

# With more extreme cases, we can't even fit inside second resolution
info = np.iinfo(np.int64)
msg = "Out of bounds nanosecond timestamp:"
for value in [info.min + 1, info.max]:
for unit in ["D", "h", "m"]:
dt64 = np.datetime64(value, unit)
with pytest.raises(OutOfBoundsDatetime, match=msg):
Timestamp(dt64)

Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ def test_cmp_cross_reso(self):

# subtracting 3600*24 gives a datetime64 that _can_ fit inside the
# nanosecond implementation bounds.
other = Timestamp(dt64 - 3600 * 24)
other = Timestamp(dt64 - 3600 * 24)._as_unit("ns")
assert other < ts
assert other.asm8 > ts.asm8 # <- numpy gets this wrong
assert ts > other
Expand Down Expand Up @@ -884,12 +884,7 @@ def test_to_period(self, dt64, ts):
)
def test_addsub_timedeltalike_non_nano(self, dt64, ts, td):

if isinstance(td, Timedelta):
# td._reso is ns
exp_reso = td._reso
else:
# effective td._reso is s
exp_reso = ts._reso
exp_reso = max(ts._reso, Timedelta(td)._reso)

result = ts - td
expected = Timestamp(dt64) - td
Expand Down
Loading