Skip to content

POC/DEPR: infer time objects to ArrowDtype[time] #53025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
43 changes: 42 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ from typing import (
Literal,
_GenericAlias,
)
import warnings

cimport cython
from cpython.datetime cimport (
@@ -99,6 +100,8 @@ cdef extern from "pandas/parser/pd_parser.h":

PandasParser_IMPORT

from pandas._config import get_option

from pandas._libs cimport util
from pandas._libs.util cimport (
INT64_MAX,
@@ -1299,6 +1302,7 @@ cdef class Seen:
bint datetimetz_ # seen_datetimetz
bint period_ # seen_period
bint interval_ # seen_interval
bint time_

def __cinit__(self, bint coerce_numeric=False):
"""
@@ -1325,6 +1329,7 @@ cdef class Seen:
self.datetimetz_ = False
self.period_ = False
self.interval_ = False
self.time_ = False
self.coerce_numeric = coerce_numeric

cdef bint check_uint64_conflict(self) except -1:
@@ -2615,6 +2620,12 @@ def maybe_convert_objects(ndarray[object] objects,
else:
seen.object_ = True
break
elif PyTime_Check(val):
if convert_non_numeric and val.tzinfo is None:
seen.time_ = True
else:
seen.object_ = True
break
else:
seen.object_ = True
break
@@ -2679,7 +2690,37 @@ def maybe_convert_objects(ndarray[object] objects,

seen.object_ = True

elif seen.nat_:
elif seen.time_:
if is_time_array(objects):
# FIXME: need to ensure this is not timetz
opt = get_option("future.infer_time")
if opt is True:
import pyarrow as pa

from pandas.core.dtypes.dtypes import ArrowDtype

obj = pa.array(objects)
dtype = ArrowDtype(obj.type)
return dtype.construct_array_type()(obj)
elif opt is False:
# explicitly set to keep the old behavior and avoid the warning
pass
else:
from pandas.util._exceptions import find_stack_level
warnings.warn(
"Pandas type inference with a sequence of `datetime.time` "
"objects is deprecated. In a future version, this will give "
"time32[pyarrow] dtype, which will require pyarrow to be "
"installed. To opt in to the new behavior immediately set "
"`pd.set_option('future.infer_time', True)`. To keep the "
"old behavior pass `dtype=object`.",
FutureWarning,
stacklevel=find_stack_level(),
)

seen.object_ = True

if seen.nat_:
if not seen.object_ and not seen.numeric_ and not seen.bool_:
# all NaT, None, or nan (at least one NaT)
# see GH#49340 for discussion of desired behavior
5 changes: 5 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
@@ -134,6 +134,9 @@ def pytest_collection_modifyitems(items, config) -> None:
# Warnings from doctests that can be ignored; place reason in comment above.
# Each entry specifies (path, message) - see the ignore_doctest_warning function
ignored_doctest_warnings = [
("DatetimeProperties.time", "with pyarrow time dtype"),
("DatetimeArray.time", "with pyarrow time dtype"),
("DatetimeIndex.time", "with pyarrow time dtype"),
("is_int64_dtype", "is_int64_dtype is deprecated"),
("is_interval_dtype", "is_interval_dtype is deprecated"),
("is_period_dtype", "is_period_dtype is deprecated"),
@@ -146,6 +149,8 @@ def pytest_collection_modifyitems(items, config) -> None:
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
("DatetimeArray.time", "with pyarrow time dtype"),
("DatetimeIndex.time", "with pyarrow time dtype"),
# Docstring divides by zero to show behavior difference
("missing.mask_zero_div_zero", "divide by zero encountered"),
(
35 changes: 32 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,8 @@

import numpy as np

from pandas._config import get_option

from pandas._libs import (
lib,
tslib,
@@ -53,6 +55,7 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
ArrowDtype,
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
@@ -82,7 +85,10 @@
)

from pandas import DataFrame
from pandas.core.arrays import PeriodArray
from pandas.core.arrays import (
ArrowExtensionArray,
PeriodArray,
)


def tz_to_dtype(
@@ -1341,7 +1347,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]:
return result

@property
def time(self) -> npt.NDArray[np.object_]:
def time(self) -> npt.NDArray[np.object_] | ArrowExtensionArray:
"""
Returns numpy array of :class:`datetime.time` objects.

@@ -1374,7 +1380,30 @@ def time(self) -> npt.NDArray[np.object_]:
# keeping their timezone and not using UTC
timestamps = self._local_timestamps()

return ints_to_pydatetime(timestamps, box="time", reso=self._creso)
result = ints_to_pydatetime(timestamps, box="time", reso=self._creso)

opt = get_option("future.infer_time")
if opt is None:
warnings.warn(
f"The behavior of {type(self).__name__}.time is deprecated. "
"In a future version, this will return an array with pyarrow time "
"dtype instead of object dtype. To opt in to the future behavior, "
"set `pd.set_option('future.infer_time', True)`.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif opt is True:
# TODO: optimize this to avoid going through ints_to_pydatetime
import pyarrow as pa

pa_type = pa.time64(self.unit)
result[self.isna()] = None
obj = pa.array(result, type=pa_type)
dtype = ArrowDtype(obj.type)
out = dtype.construct_array_type()(obj)
return out

return result

@property
def timetz(self) -> npt.NDArray[np.object_]:
11 changes: 11 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
@@ -889,3 +889,14 @@ def register_converter_cb(key) -> None:
styler_environment,
validator=is_instance_factory([type(None), str]),
)


with cf.config_prefix("future"):
cf.register_option(
"future.infer_time",
None,
"Whether to infer sequence of datetime.time objects as pyarrow time "
"dtype, which will be the default in pandas 3.0 "
"(at which point this option will be deprecated).",
validator=is_one_of_factory([True, False, None]),
)
31 changes: 30 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,8 @@
import numpy as np
from numpy import ma

from pandas._config import get_option

from pandas._libs import lib
from pandas._libs.tslibs import (
Period,
@@ -49,7 +51,10 @@
is_object_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import NumpyEADtype
from pandas.core.dtypes.dtypes import (
ArrowDtype,
NumpyEADtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCExtensionArray,
@@ -362,6 +367,30 @@ def array(
elif inferred_dtype == "boolean":
return BooleanArray._from_sequence(data, copy=copy)

elif inferred_dtype == "time":
opt = get_option("future.infer_time")

if opt is True:
import pyarrow as pa

obj = pa.array(data)
dtype = ArrowDtype(obj.type)
return dtype.construct_array_type()(obj)
elif opt is False:
# explicitly set to keep the old behavior and avoid the warning
pass
else:
warnings.warn(
"Pandas type inference with a sequence of `datetime.time` "
"objects is deprecated. In a future version, this will give "
"time32[pyarrow] dtype, which will require pyarrow to be "
"installed. To opt in to the new behavior immediately set "
"`pd.set_option('future.infer_time', True)`. To keep the "
"old behavior pass `dtype=object`.",
FutureWarning,
stacklevel=find_stack_level(),
)

# Pandas overrides NumPy for
# 1. datetime64[ns,us,ms,s]
# 2. timedelta64[ns,us,ms,s]
25 changes: 25 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,8 @@

import numpy as np

from pandas._config import get_option

from pandas._libs import lib
from pandas._libs.missing import (
NA,
@@ -38,6 +40,7 @@
IntCastingNaNError,
LossySetitemError,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
ensure_int8,
@@ -822,6 +825,28 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
val = val.asm8
dtype = val.dtype

elif isinstance(val, dt.time):
if val.tzinfo is None:
# pyarrow doesn't have a dtype for timetz.
opt = get_option("future.infer_time")
if opt is None:
warnings.warn(
"Pandas type inference with a `datetime.time` "
"object is deprecated. In a future version, this will give "
"time32[pyarrow] dtype, which will require pyarrow to be "
"installed. To opt in to the new behavior immediately set "
"`pd.set_option('future.infer_time', True)`. To keep the "
"old behavior pass `dtype=object`.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif opt is True:
import pyarrow as pa

pa_dtype = pa.time64("us")

dtype = ArrowDtype(pa_dtype)

elif is_bool(val):
dtype = np.dtype(np.bool_)

6 changes: 3 additions & 3 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
@@ -101,14 +101,14 @@ def _delegate_property_get(self, name: str): # type: ignore[override]
elif not is_list_like(result):
return result

result = np.asarray(result)

if self.orig is not None:
index = self.orig.index
else:
index = self._parent.index
# return the result as a Series
result = Series(result, index=index, name=self.name).__finalize__(self._parent)
result = Series(
result, index=index, name=self.name, dtype=result.dtype
).__finalize__(self._parent)

# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
21 changes: 16 additions & 5 deletions pandas/tests/arithmetic/common.py
Original file line number Diff line number Diff line change
@@ -33,7 +33,9 @@ def assert_cannot_add(left, right, msg="cannot add"):
right + left


def assert_invalid_addsub_type(left, right, msg=None):
def assert_invalid_addsub_type(
left, right, msg=None, can_be_not_implemented: bool = False
):
"""
Helper to assert that left and right can be neither added nor subtracted.

@@ -42,14 +44,23 @@ def assert_invalid_addsub_type(left, right, msg=None):
left : object
right : object
msg : str or None, default None
can_be_not_implemented : bool, default False
Whether to accept NotImplementedError in addition to TypeError
"""
with pytest.raises(TypeError, match=msg):

errs = TypeError
if can_be_not_implemented:
# really we are interested in pa.lib.ArrowNotImplementedError, which
# is a subclass of NotImplementedError
errs = (TypeError, NotImplementedError)

with pytest.raises(errs, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
right + left
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
left - right
with pytest.raises(TypeError, match=msg):
with pytest.raises(errs, match=msg):
right - left


Loading