diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c3fbd3ee4853e..27d7c1464fa46 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -6,6 +6,7 @@ from typing import ( Literal, _GenericAlias, ) +import warnings cimport cython from cpython.datetime cimport ( @@ -99,6 +100,8 @@ cdef extern from "pandas/parser/pd_parser.h": PandasParser_IMPORT +from pandas._config import get_option + from pandas._libs cimport util from pandas._libs.util cimport ( INT64_MAX, @@ -1299,6 +1302,7 @@ cdef class Seen: bint datetimetz_ # seen_datetimetz bint period_ # seen_period bint interval_ # seen_interval + bint time_ def __cinit__(self, bint coerce_numeric=False): """ @@ -1325,6 +1329,7 @@ cdef class Seen: self.datetimetz_ = False self.period_ = False self.interval_ = False + self.time_ = False self.coerce_numeric = coerce_numeric cdef bint check_uint64_conflict(self) except -1: @@ -2615,6 +2620,12 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.object_ = True break + elif PyTime_Check(val): + if convert_non_numeric and val.tzinfo is None: + seen.time_ = True + else: + seen.object_ = True + break else: seen.object_ = True break @@ -2679,7 +2690,37 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True - elif seen.nat_: + elif seen.time_: + if is_time_array(objects): + # FIXME: need to ensure this is not timetz + opt = get_option("future.infer_time") + if opt is True: + import pyarrow as pa + + from pandas.core.dtypes.dtypes import ArrowDtype + + obj = pa.array(objects) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + from pandas.util._exceptions import find_stack_level + warnings.warn( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + + seen.object_ = True + + if seen.nat_: if not seen.object_ and not seen.numeric_ and not seen.bool_: # all NaT, None, or nan (at least one NaT) # see GH#49340 for discussion of desired behavior diff --git a/pandas/conftest.py b/pandas/conftest.py index 45fe755568d76..a67a381246b75 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -134,6 +134,9 @@ def pytest_collection_modifyitems(items, config) -> None: # Warnings from doctests that can be ignored; place reason in comment above. # Each entry specifies (path, message) - see the ignore_doctest_warning function ignored_doctest_warnings = [ + ("DatetimeProperties.time", "with pyarrow time dtype"), + ("DatetimeArray.time", "with pyarrow time dtype"), + ("DatetimeIndex.time", "with pyarrow time dtype"), ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), ("is_period_dtype", "is_period_dtype is deprecated"), @@ -146,6 +149,8 @@ def pytest_collection_modifyitems(items, config) -> None: ("Series.idxmax", "The behavior of Series.idxmax"), ("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"), ("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"), + ("DatetimeArray.time", "with pyarrow time dtype"), + ("DatetimeIndex.time", "with pyarrow time dtype"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8ad51e4a90027..cb78e68e23b04 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -13,6 +13,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import ( lib, tslib, @@ -53,6 +55,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + ArrowDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, @@ -82,7 +85,10 @@ ) from pandas import DataFrame - from pandas.core.arrays import PeriodArray + from pandas.core.arrays import ( + ArrowExtensionArray, + PeriodArray, + ) def tz_to_dtype( @@ -1341,7 +1347,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: return result @property - def time(self) -> npt.NDArray[np.object_]: + def time(self) -> npt.NDArray[np.object_] | ArrowExtensionArray: """ Returns numpy array of :class:`datetime.time` objects. @@ -1374,7 +1380,30 @@ def time(self) -> npt.NDArray[np.object_]: # keeping their timezone and not using UTC timestamps = self._local_timestamps() - return ints_to_pydatetime(timestamps, box="time", reso=self._creso) + result = ints_to_pydatetime(timestamps, box="time", reso=self._creso) + + opt = get_option("future.infer_time") + if opt is None: + warnings.warn( + f"The behavior of {type(self).__name__}.time is deprecated. " + "In a future version, this will return an array with pyarrow time " + "dtype instead of object dtype. To opt in to the future behavior, " + "set `pd.set_option('future.infer_time', True)`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + # TODO: optimize this to avoid going through ints_to_pydatetime + import pyarrow as pa + + pa_type = pa.time64(self.unit) + result[self.isna()] = None + obj = pa.array(result, type=pa_type) + dtype = ArrowDtype(obj.type) + out = dtype.construct_array_type()(obj) + return out + + return result @property def timetz(self) -> npt.NDArray[np.object_]: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3f662073f0357..5f7d448a1092a 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -889,3 +889,14 @@ def register_converter_cb(key) -> None: styler_environment, validator=is_instance_factory([type(None), str]), ) + + +with cf.config_prefix("future"): + cf.register_option( + "future.infer_time", + None, + "Whether to infer sequence of datetime.time objects as pyarrow time " + "dtype, which will be the default in pandas 3.0 " + "(at which point this option will be deprecated).", + validator=is_one_of_factory([True, False, None]), + ) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 4ce6c35244e5b..8efbe7733a457 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -19,6 +19,8 @@ import numpy as np from numpy import ma +from pandas._config import get_option + from pandas._libs import lib from pandas._libs.tslibs import ( Period, @@ -49,7 +51,10 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import NumpyEADtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + NumpyEADtype, +) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCExtensionArray, @@ -362,6 +367,30 @@ def array( elif inferred_dtype == "boolean": return BooleanArray._from_sequence(data, copy=copy) + elif inferred_dtype == "time": + opt = get_option("future.infer_time") + + if opt is True: + import pyarrow as pa + + obj = pa.array(data) + dtype = ArrowDtype(obj.type) + return dtype.construct_array_type()(obj) + elif opt is False: + # explicitly set to keep the old behavior and avoid the warning + pass + else: + warnings.warn( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # Pandas overrides NumPy for # 1. datetime64[ns,us,ms,s] # 2. timedelta64[ns,us,ms,s] diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 09105bf49c050..95922caefb1cc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -18,6 +18,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import lib from pandas._libs.missing import ( NA, @@ -38,6 +40,7 @@ IntCastingNaNError, LossySetitemError, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int8, @@ -822,6 +825,28 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: val = val.asm8 dtype = val.dtype + elif isinstance(val, dt.time): + if val.tzinfo is None: + # pyarrow doesn't have a dtype for timetz. + opt = get_option("future.infer_time") + if opt is None: + warnings.warn( + "Pandas type inference with a `datetime.time` " + "object is deprecated. In a future version, this will give " + "time32[pyarrow] dtype, which will require pyarrow to be " + "installed. To opt in to the new behavior immediately set " + "`pd.set_option('future.infer_time', True)`. To keep the " + "old behavior pass `dtype=object`.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif opt is True: + import pyarrow as pa + + pa_dtype = pa.time64("us") + + dtype = ArrowDtype(pa_dtype) + elif is_bool(val): dtype = np.dtype(np.bool_) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index c6da7d847c363..a8e9649906cc0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -101,14 +101,14 @@ def _delegate_property_get(self, name: str): # type: ignore[override] elif not is_list_like(result): return result - result = np.asarray(result) - if self.orig is not None: index = self.orig.index else: index = self._parent.index # return the result as a Series - result = Series(result, index=index, name=self.name).__finalize__(self._parent) + result = Series( + result, index=index, name=self.name, dtype=result.dtype + ).__finalize__(self._parent) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index b608df1554154..74b4d16c2823f 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -33,7 +33,9 @@ def assert_cannot_add(left, right, msg="cannot add"): right + left -def assert_invalid_addsub_type(left, right, msg=None): +def assert_invalid_addsub_type( + left, right, msg=None, can_be_not_implemented: bool = False +): """ Helper to assert that left and right can be neither added nor subtracted. @@ -42,14 +44,23 @@ def assert_invalid_addsub_type(left, right, msg=None): left : object right : object msg : str or None, default None + can_be_not_implemented : bool, default False + Whether to accept NotImplementedError in addition to TypeError """ - with pytest.raises(TypeError, match=msg): + + errs = TypeError + if can_be_not_implemented: + # really we are interested in pa.lib.ArrowNotImplementedError, which + # is a subclass of NotImplementedError + errs = (TypeError, NotImplementedError) + + with pytest.raises(errs, match=msg): left + right - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): right + left - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): left - right - with pytest.raises(TypeError, match=msg): + with pytest.raises(errs, match=msg): right - left diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index e6c743c76a2c1..30743f133cfd7 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -20,6 +20,7 @@ from pandas._libs.tslibs.conversion import localize_pydatetime from pandas._libs.tslibs.offsets import shift_months from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -1166,31 +1167,52 @@ def test_dt64arr_add_sub_parr( ) assert_invalid_addsub_type(dtarr, parr, msg) - def test_dt64arr_addsub_time_objects_raises(self, box_with_array, tz_naive_fixture): + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) + def test_dt64arr_addsub_time_objects_raises( + self, box_with_array, tz_naive_fixture, future, request + ): # https://github.com/pandas-dev/pandas/issues/10329 tz = tz_naive_fixture + if str(tz) == "tzlocal()" and future is True: + # TODO(GH#53278) + mark = pytest.mark.xfail( + reason="Incorrectly raises AttributeError instead of TypeError", + # some but not all CI builds + strict=False, + ) + request.node.add_marker(mark) obj1 = date_range("2012-01-01", periods=3, tz=tz) obj2 = [time(i, i, i) for i in range(3)] obj1 = tm.box_expected(obj1, box_with_array) - obj2 = tm.box_expected(obj2, box_with_array) - - msg = "|".join( - [ - "unsupported operand", - "cannot subtract DatetimeArray from ndarray", - ] - ) + msgs = [ + "unsupported operand", + "cannot subtract DatetimeArray from ndarray", + ] + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + warn = None + if future is True: + msgs.append(r"Function '(add|subtract)_checked' has no kernel") + elif future is None: + warn = FutureWarning + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=warn_msg): + obj2 = tm.box_expected(obj2, box_with_array) + + msg = "|".join(msgs) with warnings.catch_warnings(record=True): # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being # applied to Series or DatetimeIndex # we aren't testing that here, so ignore. warnings.simplefilter("ignore", PerformanceWarning) - assert_invalid_addsub_type(obj1, obj2, msg=msg) + assert_invalid_addsub_type(obj1, obj2, msg=msg, can_be_not_implemented=True) # ------------------------------------------------------------- # Other invalid operations diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1fe1d4efbefd7..64bb25c39fae9 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -155,8 +155,14 @@ def test_to_pydatetime(self, dta_dti): def test_time_date(self, dta_dti, meth): dta, dti = dta_dti - result = getattr(dta, meth) - expected = getattr(dti, meth) + warn = None + msg = "In a future version, this will return an array with pyarrow time dtype" + if meth == "time": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=msg): + result = getattr(dta, meth) + expected = getattr(dti, meth) tm.assert_numpy_array_equal(result, expected) def test_format_native_types(self, unit, dtype, dta_dti): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 78f960f4d46d5..5ce564a9a3562 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1038,6 +1038,58 @@ def test_maybe_convert_objects_ea(self, idx): ) tm.assert_extension_array_equal(result, idx._data) + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) + def test_maybe_convert_objects_time(self, future): + ts = Timestamp.now() + objs = np.array([ts.time()], dtype=object) + + msg = "Pandas type inference with a sequence of `datetime.time` objects" + warn = None + if future is True: + pa = pytest.importorskip("pyarrow") + dtype = pd.ArrowDtype(pa.time64("us")) + exp = dtype.construct_array_type()._from_sequence(objs, dtype=dtype) + else: + if future is None: + warn = FutureWarning + exp = objs + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=msg): + out = lib.maybe_convert_objects(objs, convert_non_numeric=True) + with tm.assert_produces_warning(warn, match=msg): + ser = Series(objs) + with tm.assert_produces_warning(warn, match=msg): + ser2 = Series(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + df = DataFrame(objs) + with tm.assert_produces_warning(warn, match=msg): + df2 = DataFrame(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + idx = Index(objs) + with tm.assert_produces_warning(warn, match=msg): + idx2 = Index(list(objs)) + with tm.assert_produces_warning(warn, match=msg): + arr = pd.array(objs) + with tm.assert_produces_warning(warn, match=msg): + arr2 = pd.array(list(objs)) + + tm.assert_equal(out, exp) + if future: + tm.assert_equal(arr, exp) + tm.assert_equal(arr2, exp) + else: + tm.assert_equal(arr, pd.core.arrays.PandasArray(exp)) + tm.assert_equal(arr2, pd.core.arrays.PandasArray(exp)) + tm.assert_series_equal(ser, Series(exp, dtype=exp.dtype)) + tm.assert_series_equal(ser2, Series(exp, dtype=exp.dtype)) + tm.assert_frame_equal(df, DataFrame(exp, dtype=exp.dtype)) + tm.assert_frame_equal(df2, DataFrame(exp, dtype=exp.dtype)) + tm.assert_index_equal(idx, Index(exp, dtype=exp.dtype)) + tm.assert_index_equal(idx2, Index(exp, dtype=exp.dtype)) + class TestTypeInference: # Dummy class used for testing with Python objects diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 197cdc3f436a1..eb7524ede7be4 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -744,6 +744,28 @@ class TestBaseReshaping(base.BaseReshapingTests): def test_transpose(self, data): super().test_transpose(data) + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + warn = None + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_time(pa_dtype): + # FIXME: need to avoid doing inference when calling frame._constructor + # in _stack_multi_columns + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + super().test_stack(data, columns) + class TestBaseSetitem(base.BaseSetitemTests): @pytest.mark.xfail( @@ -806,6 +828,18 @@ def test_invert(self, data, request): class TestBaseMethods(base.BaseMethodsTests): + def test_hash_pandas_object_works(self, data, as_frame): + pa_dtype = data.dtype.pyarrow_dtype + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn = None + if pa.types.is_time(pa_dtype): + # TODO(#48964) This warning will be avoided by implementing + # ArrowExtensionArray.hash_pandas_object + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg, check_stacklevel=False): + super().test_hash_pandas_object_works(data, as_frame) + @pytest.mark.parametrize("periods", [1, -2]) def test_diff(self, data, periods, request): pa_dtype = data.dtype.pyarrow_dtype diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 31f404258a9bb..413d137968af5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3168,6 +3168,40 @@ def test_tzaware_data_tznaive_dtype(self, constructor, box, frame_or_series): with pytest.raises(err, match=msg): constructor(ts, dtype="M8[ns]") + @pytest.mark.parametrize( + "future", [pytest.param(True, marks=td.skip_if_no("pyarrow")), False, None] + ) + def test_from_pytime(self, constructor, box, frame_or_series, future): + item = Timestamp("2023-05-04 08:53").time() + + warn = None + if box is list or (box is dict and frame_or_series is Series): + msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + else: + msg = "Pandas type inference with a `datetime.time` object is deprecated" + exp_dtype = np.dtype(object) + if future is None: + warn = FutureWarning + elif future is True: + import pyarrow as pa + + pa_type = pa.time64("us") + exp_dtype = pd.ArrowDtype(pa_type) + + with pd.option_context("future.infer_time", future): + with tm.assert_produces_warning(warn, match=msg): + result = constructor(item) + dtype = tm.get_dtype(result) + assert dtype == exp_dtype + + aware = Timestamp("2023-05-04 08:53", tz="US/Pacific").timetz() + result2 = constructor(aware) + dtype = tm.get_dtype(result2) + assert dtype == np.dtype(object) + # TODO: better location for this test? class TestAllowNonNano: diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f827eaf63a342..7efb5a20249a9 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -671,7 +671,14 @@ def test_datetime_method(method): def test_datetime_property(attr): s = pd.Series(pd.date_range("2000", periods=4)) s.attrs = {"a": 1} - result = getattr(s.dt, attr) + + warn = None + msg = "In a future version, this will return an array with pyarrow time dtype" + if attr == "time": + warn = FutureWarning + with tm.assert_produces_warning(warn, match=msg): + result = getattr(s.dt, attr) + assert result.attrs == {"a": 1} diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a9912d75c8978..5af11f364578e 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1,6 +1,7 @@ from datetime import ( date, datetime, + time, ) from io import StringIO @@ -836,7 +837,16 @@ def test_apply_datetime_issue(group_column_dtlike): # is a datetime object and the column labels are different from # standard int values in range(len(num_columns)) - df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + warn = None + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + if isinstance(group_column_dtlike, time): + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg): + df = DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) result = df.groupby("a").apply(lambda x: Series(["spam"], index=[42])) expected = DataFrame( diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index f07a9dce5f6ae..e2bd5450d1f57 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -24,7 +24,9 @@ class TestDatetimeIndexOps: def test_dti_time(self): rng = date_range("1/1/2000", freq="12min", periods=10) - result = pd.Index(rng).time + msg = "In a future version, this will return an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pd.Index(rng).time expected = [t.time() for t in rng] assert (result == expected).all() diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 6f3c83b999e94..eda4e98da8fd0 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -853,7 +853,10 @@ def test_time_accessor(self, dtype): expected = np.array([time(10, 20, 30), pd.NaT]) index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) - result = index.time + + msg = "In a future version, this will return an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = index.time tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 7df5b928858d8..bb7a503d58ae7 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -986,13 +986,17 @@ def test_reader_seconds(self, request, engine, read_ext): time(16, 37, 0, 900000), time(18, 20, 54), ] - } + }, + dtype=object, ) - actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") + warn_msg = "Pandas type inference with a sequence of `datetime.time` objects" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) - actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1") tm.assert_frame_equal(actual, expected) def test_read_excel_multiindex(self, request, read_ext): diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index eecacf29de872..d228929eb9ccd 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -479,7 +479,9 @@ def test_date_col_as_index_col(all_parsers): if parser.engine == "pyarrow": # https://github.com/pandas-dev/pandas/issues/44231 # pyarrow 6.0 starts to infer time type - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + msg = "In a future version, this will return an array with pyarrow time dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 6800e55396d7b..5ccc0f7b7ccbf 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2313,9 +2313,14 @@ def test_datetime_date(self): def test_datetime_time(self, sqlite_buildin): # test support for datetime.time - df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) assert df.to_sql("test_time", self.conn, index=False) == 2 - res = read_sql_table("test_time", self.conn) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) # GH8341 @@ -2331,7 +2336,9 @@ def test_datetime_time(self, sqlite_buildin): res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) tm.assert_frame_equal(ref, res) - res = sql.read_sql_table("test_time3", self.conn) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = sql.read_sql_table("test_time3", self.conn) tm.assert_frame_equal(df, res) def test_mixed_dtype_insert(self): @@ -3168,13 +3175,19 @@ def test_datetime_date(self): @pytest.mark.parametrize("tz_aware", [False, True]) def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 + if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] else: tz_dt = date_range("2013-01-01 09:00:00", periods=2, tz="US/Pacific") tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz()) - df = DataFrame(tz_times, columns=["a"]) + warn_msg = "Pandas type inference with a sequence of `datetime.time`" + warn = None + if not tz_aware: + warn = FutureWarning + with tm.assert_produces_warning(warn, match=warn_msg): + df = DataFrame(tz_times, columns=["a"]) assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 5cbe7b6e30c84..6923221632ced 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -753,11 +753,25 @@ def test_plot_scatter_shape(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter df = DataFrame(np.random.randn(10), columns=["a"]) - df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` " + "objects is deprecated" + ) + + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time + msg = "must be a string or a (real )?number, not 'datetime.time'" with pytest.raises(TypeError, match=msg): - df.plot(kind="scatter", x="dtime", y="a") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + # warns bc it calls infer_objects inside df.plot + df.plot(kind="scatter", x="dtime", y="a") + + with pd.option_context("future.infer_time", True): + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(None): + df.plot(kind="scatter", x="dtime", y="a") @pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)]) def test_scatterplot_datetime_data(self, x, y): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 4b121841b4e4d..a1f9224d5fce9 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1053,6 +1053,7 @@ def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + ts = Index(ts, dtype=object) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) @@ -1076,7 +1077,10 @@ def test_time(self): def test_time_change_xlim(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() - ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + ts = Index( + np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]), + dtype=object, + ) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) @@ -1118,6 +1122,7 @@ def test_time_musec(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) + ts = Index(ts, dtype=object) df = DataFrame( {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts ) diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index 4cb3624309916..994c81e52d172 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -211,8 +211,12 @@ def test_dt_accessor_api_for_categorical(self, idx): tm.assert_equal(res, exp) for attr in attr_names: - res = getattr(cat.dt, attr) - exp = getattr(ser.dt, attr) + with warnings.catch_warnings(): + if attr == "time": + # deprecated to return pyarrow time dtype + warnings.simplefilter("ignore", FutureWarning) + res = getattr(cat.dt, attr) + exp = getattr(ser.dt, attr) tm.assert_equal(res, exp) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5cdeee20f3435..b24b362e11d06 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,6 +12,7 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz +from pandas.compat import pa_version_under7p0 from pandas.errors import SettingWithCopyError from pandas.core.dtypes.common import ( @@ -87,10 +88,19 @@ def get_expected(ser, prop): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, DataFrame): return result - return Series(result, index=ser.index, name=ser.name) + return Series(result, index=ser.index, name=ser.name, dtype=result.dtype) + + if name == "time": + msg = ( + "In a future version, this will return an array with pyarrow time dtype" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + left = getattr(ser.dt, name) + right = get_expected(ser, name) + else: + left = getattr(ser.dt, name) + right = get_expected(ser, name) - left = getattr(ser.dt, name) - right = get_expected(ser, name) if not (is_list_like(left) and is_list_like(right)): assert left == right elif isinstance(left, DataFrame): @@ -672,10 +682,31 @@ def test_valid_dt_with_missing_values(self): ) tm.assert_series_equal(result, expected) - result = ser.dt.time + msg = "In a future version, this will return an array with pyarrow time" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.time expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object") tm.assert_series_equal(result, expected) + with pd.option_context("future.infer_time", False): + with tm.assert_produces_warning(None): + result = ser.dt.time + tm.assert_series_equal(result, expected) + + if pa_version_under7p0: + return + + with pd.option_context("future.infer_time", True): + with tm.assert_produces_warning(None): + result_pa = ser.dt.time + + import pyarrow as pa + + pa_dtype = pa.time64("ns") + dtype = pd.ArrowDtype(pa_dtype) + expected_pa = expected.astype(dtype) + tm.assert_series_equal(result_pa, expected_pa) + def test_dt_accessor_api(self): # GH 9322 from pandas.core.indexes.accessors import ( @@ -725,7 +756,8 @@ def test_dt_timetz_accessor(self, tz_naive_fixture): ) ser = Series(dtindex) expected = Series( - [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)] + [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)], + dtype=object, ) result = ser.dt.timetz tm.assert_series_equal(result, expected) diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index c439a5f006922..c3b77569cc751 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -31,7 +31,16 @@ def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype): box = index_or_series inferred_dtype, values = any_skipna_inferred_dtype - t = box(values, dtype=dtype) # explicit dtype to avoid casting + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` objects " + "is deprecated" + ) + warn = None + if dtype == "category" and inferred_dtype == "time": + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg): + t = box(values, dtype=dtype) # explicit dtype to avoid casting types_passing_constructor = [ "string", diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 5046fd9d0edc1..eb987d8a63b39 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -61,9 +61,15 @@ def test_arraylike(self): with pytest.raises(ValueError, match=msg): to_time(arg, format="%I:%M%p", errors="raise") - tm.assert_series_equal( - to_time(Series(arg, name="test")), Series(expected_arr, name="test") + warn_msg = ( + "Pandas type inference with a sequence of `datetime.time` objects " + "is deprecated" ) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res_ser = to_time(Series(arg, name="test")) + exp_ser = Series(expected_arr, name="test", dtype=object) + + tm.assert_series_equal(res_ser, exp_ser) res = to_time(np.array(arg)) assert isinstance(res, list)