diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 7b5c77af7864b..53c2cfd345e32 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -135,16 +135,15 @@ def astype_nansafe( elif is_object_dtype(arr.dtype): # if we have a datetime/timedelta array of objects - # then coerce to a proper dtype and recall astype_nansafe + # then coerce to datetime64[ns] and use DatetimeArray.astype if is_datetime64_dtype(dtype): from pandas import to_datetime - return astype_nansafe( - to_datetime(arr.ravel()).values.reshape(arr.shape), - dtype, - copy=copy, - ) + dti = to_datetime(arr.ravel()) + dta = dti._data.reshape(arr.shape) + return dta.astype(dtype, copy=False)._ndarray + elif is_timedelta64_dtype(dtype): # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index c8a3c992248ad..472ae80dc1838 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -757,7 +757,13 @@ def test_astype_datetime64_bad_dtype_raises(from_type, to_type): to_type = np.dtype(to_type) - with pytest.raises(TypeError, match="cannot astype"): + msg = "|".join( + [ + "cannot astype a timedelta", + "cannot astype a datetimelike", + ] + ) + with pytest.raises(TypeError, match=msg): astype_nansafe(arr, dtype=to_type) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 96ef49acdcb21..9d56dba9b480d 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -377,6 +377,16 @@ def test_astype_column_metadata(self, dtype): df = df.astype(dtype) tm.assert_index_equal(df.columns, columns) + @pytest.mark.parametrize("unit", ["Y", "M", "W", "D", "h", "m"]) + def test_astype_from_object_to_datetime_unit(self, unit): + vals = [ + ["2015-01-01", "2015-01-02", "2015-01-03"], + ["2017-01-01", "2017-01-02", "2017-02-03"], + ] + df = DataFrame(vals, dtype=object) + with pytest.raises(TypeError, match="Cannot cast"): + df.astype(f"M8[{unit}]") + @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_from_datetimelike_to_object(self, dtype, unit): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 32a4dc06d08e2..ef80cc847a5b8 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1955,19 +1955,11 @@ def test_constructor_datetimes_with_nulls(self, arr): @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) @pytest.mark.parametrize( - "dtype", - [ - "datetime64[M]", - "datetime64[D]", - "datetime64[h]", - "datetime64[m]", - "datetime64[s]", - "datetime64[ms]", - "datetime64[us]", - "datetime64[ns]", - ], + "unit", + ["M", "D", "h", "m", "s", "ms", "us", "ns"], ) - def test_constructor_datetimes_non_ns(self, order, dtype): + def test_constructor_datetimes_non_ns(self, order, unit): + dtype = f"datetime64[{unit}]" na = np.array( [ ["2015-01-01", "2015-01-02", "2015-01-03"], @@ -1977,13 +1969,16 @@ def test_constructor_datetimes_non_ns(self, order, dtype): order=order, ) df = DataFrame(na) - expected = DataFrame( - [ - ["2015-01-01", "2015-01-02", "2015-01-03"], - ["2017-01-01", "2017-01-02", "2017-02-03"], - ] - ) - expected = expected.astype(dtype=dtype) + expected = DataFrame(na.astype("M8[ns]")) + if unit in ["M", "D", "h", "m"]: + with pytest.raises(TypeError, match="Cannot cast"): + expected.astype(dtype) + + # instead the constructor casts to the closest supported reso, i.e. "s" + expected = expected.astype("datetime64[s]") + else: + expected = expected.astype(dtype=dtype) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("order", ["K", "A", "C", "F"]) diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 5629830767c3c..412c8a8dde175 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -128,14 +128,14 @@ def test_dtypes_with_names(parser): df_result = read_xml( xml_dates, names=["Col1", "Col2", "Col3", "Col4"], - dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64"}, + dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"}, parser=parser, ) df_iter = read_xml_iterparse( xml_dates, parser=parser, names=["Col1", "Col2", "Col3", "Col4"], - dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64"}, + dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"}, iterparse={"row": ["shape", "degrees", "sides", "date"]}, ) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 946e7e48148b4..ab589dc26a3ac 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -730,13 +730,13 @@ def test_other_datetime_unit(self, unit): ser = Series([None, None], index=[101, 102], name="days") dtype = f"datetime64[{unit}]" - df2 = ser.astype(dtype).to_frame("days") if unit in ["D", "h", "m"]: # not supported so we cast to the nearest supported unit, seconds exp_dtype = "datetime64[s]" else: exp_dtype = dtype + df2 = ser.astype(exp_dtype).to_frame("days") assert df2["days"].dtype == exp_dtype result = df1.merge(df2, left_on="entity_id", right_index=True)