pandas-dev · mroeschke · Oct 12, 2022 · Sep 28, 2022 · Sep 29, 2022 · Oct 8, 2022
diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst
@@ -120,6 +120,7 @@ Other API changes
 - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
+- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -514,6 +514,7 @@ cpdef array_to_datetime(
                         found_tz = True
                         if utc_convert:
                             _ts = convert_datetime_to_tsobject(val, None)
+                            _ts.ensure_reso(NPY_FR_ns)
                             iresult[i] = _ts.value
                         elif found_naive:
                             raise ValueError('Tz-aware datetime.datetime '
@@ -527,6 +528,7 @@ cpdef array_to_datetime(
                             found_tz = True
                             tz_out = val.tzinfo
                             _ts = convert_datetime_to_tsobject(val, None)
+                            _ts.ensure_reso(NPY_FR_ns)
                             iresult[i] = _ts.value
 
                     else:
@@ -535,7 +537,7 @@ cpdef array_to_datetime(
                             raise ValueError('Cannot mix tz-aware with '
                                              'tz-naive values')
                         if isinstance(val, _Timestamp):
-                            iresult[i] = val.value
+                            iresult[i] = val._as_unit("ns").value
                         else:
                             iresult[i] = pydatetime_to_dt64(val, &dts)
                             check_dts_bounds(&dts)

diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd
@@ -20,6 +20,9 @@ cdef class _TSObject:
         int64_t value               # numpy dt64
         tzinfo tzinfo
         bint fold
+        NPY_DATETIMEUNIT reso
+
+    cdef void ensure_reso(self, NPY_DATETIMEUNIT reso)
 
 
 cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -31,12 +31,14 @@ import_datetime()
 from pandas._libs.tslibs.base cimport ABCTimestamp
 from pandas._libs.tslibs.dtypes cimport (
     abbrev_to_npy_unit,
+    get_supported_reso,
     periods_per_second,
 )
 from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
     NPY_FR_ns,
     check_dts_bounds,
+    convert_reso,
     get_datetime64_unit,
     get_datetime64_value,
     get_implementation_bounds,
@@ -204,10 +206,16 @@ cdef class _TSObject:
     #    int64_t value               # numpy dt64
     #    tzinfo tzinfo
     #    bint fold
+    #    NPY_DATETIMEUNIT reso
 
     def __cinit__(self):
         # GH 25057. As per PEP 495, set fold to 0 by default
         self.fold = 0
+        self.reso = NPY_FR_ns  # default value
+
+    cdef void ensure_reso(self, NPY_DATETIMEUNIT reso):
+        if self.reso != reso:
+            self.value = convert_reso(self.value, self.reso, reso, False)
 
 
 cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
@@ -228,6 +236,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
     """
     cdef:
         _TSObject obj
+        NPY_DATETIMEUNIT reso
 
     obj = _TSObject()
 
@@ -237,9 +246,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
     if ts is None or ts is NaT:
         obj.value = NPY_NAT
     elif is_datetime64_object(ts):
-        obj.value = get_datetime64_nanos(ts, NPY_FR_ns)
+        reso = get_supported_reso(get_datetime64_unit(ts))
+        obj.reso = reso
+        obj.value = get_datetime64_nanos(ts, reso)
         if obj.value != NPY_NAT:
-            pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts)
+            pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
     elif is_integer_object(ts):
         try:
             ts = <int64_t>ts
@@ -295,7 +306,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
         raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
                         f'Timestamp')
 
-    maybe_localize_tso(obj, tz, NPY_FR_ns)
+    maybe_localize_tso(obj, tz, obj.reso)
     return obj
 
 

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport (
     is_array,
     is_datetime64_object,
     is_integer_object,
-    is_timedelta64_object,
 )
 
 from pandas._libs.tslibs.fields import (
@@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport (
 from pandas._libs.tslibs.timedeltas cimport (
     _Timedelta,
     delta_to_nanoseconds,
-    ensure_td64ns,
     is_any_td_scalar,
 )
 
@@ -282,6 +280,7 @@ cdef class _Timestamp(ABCTimestamp):
             )
 
         obj.value = value
+        obj.reso = reso
         pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
         maybe_localize_tso(obj, tz, reso)
 
@@ -432,62 +431,26 @@ cdef class _Timestamp(ABCTimestamp):
             int64_t nanos = 0
 
         if is_any_td_scalar(other):
-            if is_timedelta64_object(other):
-                other_reso = get_datetime64_unit(other)
-                if (
-                    other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
-                ):
-                    # TODO: deprecate allowing this?  We only get here
-                    #  with test_timedelta_add_timestamp_interval
-                    other = np.timedelta64(other.view("i8"), "ns")
-                    other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
-                elif (
-                    other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
-                ):
-                    # TODO: deprecate allowing these?  or handle more like the
-                    #  corresponding DateOffsets?
-                    # TODO: no tests get here
-                    other = ensure_td64ns(other)
-                    other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
-
-                if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns:
-                    # TODO: no tests
-                    other = ensure_td64ns(other)
-                if other_reso > self._reso:
-                    # Following numpy, we cast to the higher resolution
-                    # test_sub_timedelta64_mismatched_reso
-                    self = (<_Timestamp>self)._as_reso(other_reso)
-
-
-            if isinstance(other, _Timedelta):
-                # TODO: share this with __sub__, Timedelta.__add__
-                # Matching numpy, we cast to the higher resolution. Unlike numpy,
-                #  we raise instead of silently overflowing during this casting.
-                if self._reso < other._reso:
-                    self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
-                elif self._reso > other._reso:
-                    other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
+            other = Timedelta(other)
 
-            try:
-                nanos = delta_to_nanoseconds(
-                    other, reso=self._reso, round_ok=False
-                )
-            except OutOfBoundsTimedelta:
-                raise
+            # TODO: share this with __sub__, Timedelta.__add__
+            # Matching numpy, we cast to the higher resolution. Unlike numpy,
+            #  we raise instead of silently overflowing during this casting.
+            if self._reso < other._reso:
+                self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
+            elif self._reso > other._reso:
+                other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
 
-            try:
-                new_value = self.value + nanos
-            except OverflowError:
-                # Use Python ints
-                # Hit in test_tdi_add_overflow
-                new_value = int(self.value) + int(nanos)
+            nanos = other.value
 
             try:
+                new_value = self.value + nanos
                 result = type(self)._from_value_and_reso(
                     new_value, reso=self._reso, tz=self.tzinfo
                 )
             except OverflowError as err:
                 # TODO: don't hard-code nanosecond here
+                new_value = int(self.value) + int(nanos)
                 raise OutOfBoundsDatetime(
                     f"Out of bounds nanosecond timestamp: {new_value}"
                 ) from err
@@ -1713,7 +1676,7 @@ class Timestamp(_Timestamp):
             if not is_offset_object(freq):
                 freq = to_offset(freq)
 
-        return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold)
+        return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.reso)
 
     def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'):
         cdef:

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -3113,14 +3113,34 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
         with pytest.raises(TypeError, match=msg):
             constructor(scalar, dtype=dtype)
 
+    @pytest.mark.xfail(
+        reason="Timestamp constructor has been updated to cast dt64 to non-nano, "
+        "but DatetimeArray._from_sequence has not"
+    )
     @pytest.mark.parametrize("cls", [datetime, np.datetime64])
-    def test_from_out_of_bounds_datetime(self, constructor, cls):
+    def test_from_out_of_ns_bounds_datetime(self, constructor, cls, request):
+        # scalar that won't fit in nanosecond dt64, but will fit in microsecond
         scalar = datetime(9999, 1, 1)
+        exp_dtype = "M8[us]"  # smallest reso that fits
         if cls is np.datetime64:
             scalar = np.datetime64(scalar, "D")
+            exp_dtype = "M8[s]"  # closest reso to input
         result = constructor(scalar)
 
-        assert type(get1(result)) is cls
+        item = get1(result)
+        dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
+
+        assert type(item) is Timestamp
+        assert item.asm8.dtype == exp_dtype
+        assert dtype == exp_dtype
+
+    def test_out_of_s_bounds_datetime64(self, constructor):
+        scalar = np.datetime64(np.iinfo(np.int64).max, "D")
+        result = constructor(scalar)
+        item = get1(result)
+        assert type(item) is np.datetime64
+        dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
+        assert dtype == object
 
     @pytest.mark.xfail(
         reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, "

diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -99,11 +99,11 @@ def test_td_add_datetimelike_scalar(self, op):
         assert result is NaT
 
     def test_td_add_timestamp_overflow(self):
-        msg = "Cannot cast 259987 from D to 'ns' without overflow"
+        msg = "Cannot cast 259987 from D to 'ns' without overflow."
         with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")
 
-        msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
+        msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow"
         with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timestamp("1700-01-01") + timedelta(days=13 * 19999)
 

diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -45,7 +45,7 @@ def test_overflow_offset_raises(self):
             r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
             "will overflow"
         )
-        lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"
+        lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"
 
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
             stamp + offset_overflow
@@ -62,7 +62,9 @@ def test_overflow_offset_raises(self):
         stamp = Timestamp("2000/1/1")
         offset_overflow = to_offset("D") * 100**5
 
-        lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
+        lmsg3 = (
+            r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow"
+        )
         with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
             stamp + offset_overflow
 

diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -11,6 +11,7 @@
 import pytest
 import pytz
 
+from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 from pandas.compat import PY310
 from pandas.errors import OutOfBoundsDatetime
 
@@ -455,14 +456,26 @@ def test_out_of_bounds_value(self):
         Timestamp(min_ts_us)
         Timestamp(max_ts_us)
 
+        # We used to raise on these before supporting non-nano
+        us_val = NpyDatetimeUnit.NPY_FR_us.value
+        assert Timestamp(min_ts_us - one_us)._reso == us_val
+        assert Timestamp(max_ts_us + one_us)._reso == us_val
+
+        # https://github.com/numpy/numpy/issues/22346 for why
+        #  we can't use the same construction as above with minute resolution
+
+        # too_low, too_high are the _just_ outside the range of M8[s]
+        too_low = np.datetime64("-292277022657-01-27T08:29", "m")
+        too_high = np.datetime64("292277026596-12-04T15:31", "m")
+
         msg = "Out of bounds"
         # One us less than the minimum is an error
         with pytest.raises(ValueError, match=msg):
-            Timestamp(min_ts_us - one_us)
+            Timestamp(too_low)
 
         # One us more than the maximum is an error
         with pytest.raises(ValueError, match=msg):
-            Timestamp(max_ts_us + one_us)
+            Timestamp(too_high)
 
     def test_out_of_bounds_string(self):
         msg = "Out of bounds"
@@ -487,7 +500,20 @@ def test_bounds_with_different_units(self):
         for date_string in out_of_bounds_dates:
             for unit in time_units:
                 dt64 = np.datetime64(date_string, unit)
-                msg = "Out of bounds"
+                ts = Timestamp(dt64)
+                if unit in ["s", "ms", "us"]:
+                    # We can preserve the input unit
+                    assert ts.value == dt64.view("i8")
+                else:
+                    # we chose the closest unit that we _do_ support
+                    assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value
+
+        # With more extreme cases, we can't even fit inside second resolution
+        info = np.iinfo(np.int64)
+        msg = "Out of bounds nanosecond timestamp:"
+        for value in [info.min + 1, info.max]:
+            for unit in ["D", "h", "m"]:
+                dt64 = np.datetime64(value, unit)
                 with pytest.raises(OutOfBoundsDatetime, match=msg):
                     Timestamp(dt64)
 

diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -826,7 +826,7 @@ def test_cmp_cross_reso(self):
 
         # subtracting 3600*24 gives a datetime64 that _can_ fit inside the
         #  nanosecond implementation bounds.
-        other = Timestamp(dt64 - 3600 * 24)
+        other = Timestamp(dt64 - 3600 * 24)._as_unit("ns")
         assert other < ts
         assert other.asm8 > ts.asm8  # <- numpy gets this wrong
         assert ts > other
@@ -884,12 +884,7 @@ def test_to_period(self, dt64, ts):
     )
     def test_addsub_timedeltalike_non_nano(self, dt64, ts, td):
 
-        if isinstance(td, Timedelta):
-            # td._reso is ns
-            exp_reso = td._reso
-        else:
-            # effective td._reso is s
-            exp_reso = ts._reso
+        exp_reso = max(ts._reso, Timedelta(td)._reso)
 
         result = ts - td
         expected = Timestamp(dt64) - td