Skip to content

DEPR: Deprecate box kwarg for to_timedelta and to_datetime #24486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 13, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
@@ -94,6 +94,7 @@ Deprecations
~~~~~~~~~~~~

- Deprecated the `M (months)` and `Y (year)` `units` parameter of :func: `pandas.to_timedelta`, :func: `pandas.Timedelta` and :func: `pandas.TimedeltaIndex` (:issue:`16344`)
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64`/:meth:`Timedelta.to_timedelta64`. (:issue:`24416`)

.. _whatsnew_0250.prior_deprecations:

4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
@@ -794,10 +794,10 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
# Immediate return if coerce
if datetime:
from pandas import to_datetime
return to_datetime(values, errors='coerce', box=False)
return to_datetime(values, errors='coerce').to_numpy()
elif timedelta:
from pandas import to_timedelta
return to_timedelta(values, errors='coerce', box=False)
return to_timedelta(values, errors='coerce').to_numpy()
elif numeric:
from pandas import to_numeric
return to_numeric(values, errors='coerce')
3 changes: 2 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -300,7 +300,8 @@ def asobject(self):
return self.astype(object)

def _convert_tolerance(self, tolerance, target):
tolerance = np.asarray(to_timedelta(tolerance, box=False))
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())

if target.size != tolerance.size and tolerance.size > 1:
raise ValueError('list-like tolerance size must match '
'target index size')
8 changes: 8 additions & 0 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@
DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string)
from pandas._libs.tslibs.strptime import array_strptime
from pandas.compat import zip
from pandas.util._decorators import deprecate_kwarg

from pandas.core.dtypes.common import (
ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
@@ -398,6 +399,7 @@ def _adjust_to_origin(arg, origin, unit):
return arg


@deprecate_kwarg(old_arg_name='box', new_arg_name=None)
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
utc=None, box=True, format=None, exact=True,
unit=None, infer_datetime_format=False, origin='unix',
@@ -444,6 +446,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,

- If True returns a DatetimeIndex or Index-like object
- If False returns ndarray of values.

.. deprecated:: 0.25.0
Use :meth:`.to_numpy` or :meth:`Timestamp.to_datetime64`
instead to get an ndarray of values or numpy.datetime64,
respectively.

format : string, default None
strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
8 changes: 8 additions & 0 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
@@ -8,13 +8,15 @@

from pandas._libs.tslibs import NaT
from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit
from pandas.util._decorators import deprecate_kwarg

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries

from pandas.core.arrays.timedeltas import sequence_to_td64ns


@deprecate_kwarg(old_arg_name='box', new_arg_name=None)
def to_timedelta(arg, unit='ns', box=True, errors='raise'):
"""
Convert argument to timedelta.
@@ -40,6 +42,12 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'):
- If True returns a Timedelta/TimedeltaIndex of the results.
- If False returns a numpy.timedelta64 or numpy.darray of
values of dtype timedelta64[ns].

.. deprecated:: 0.25.0
Use :meth:`.to_numpy` or :meth:`Timedelta.to_timedelta64`
instead to get an ndarray of values or numpy.timedelta64,
respectively.

errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
- If 'coerce', then invalid parsing will be set as NaT.
4 changes: 2 additions & 2 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
@@ -3164,11 +3164,11 @@ def converter(*date_cols):
return tools.to_datetime(
ensure_object(strs),
utc=None,
box=False,
dayfirst=dayfirst,
errors='ignore',
infer_datetime_format=infer_datetime_format
)
).to_numpy()

except ValueError:
return tools.to_datetime(
parsing.try_parse_dates(strs, dayfirst=dayfirst))
95 changes: 49 additions & 46 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
@@ -184,9 +184,6 @@ def test_to_datetime_format_weeks(self, cache):
for s, format, dt in data:
assert to_datetime(s, format=format, cache=cache) == dt

@pytest.mark.parametrize("box,const", [
[True, pd.Index],
[False, np.array]])
@pytest.mark.parametrize("fmt,dates,expected_dates", [
['%Y-%m-%d %H:%M:%S %Z',
['2010-01-01 12:00:00 UTC'] * 2,
@@ -218,15 +215,15 @@ def test_to_datetime_format_weeks(self, cache):
tzinfo=pytz.FixedOffset(0)), # pytz coerces to UTC
pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(0))]]])
def test_to_datetime_parse_tzname_or_tzoffset(self, box, const,
fmt, dates, expected_dates):
def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates,
expected_dates):
# GH 13486
result = pd.to_datetime(dates, format=fmt, box=box)
expected = const(expected_dates)
result = pd.to_datetime(dates, format=fmt)
expected = pd.Index(expected_dates)
tm.assert_equal(result, expected)

with pytest.raises(ValueError):
pd.to_datetime(dates, format=fmt, box=box, utc=True)
pd.to_datetime(dates, format=fmt, utc=True)

@pytest.mark.parametrize('offset', [
'+0', '-1foo', 'UTCbar', ':10', '+01:000:01', ''])
@@ -256,7 +253,7 @@ def test_to_datetime_dtarr(self, tz):
result = to_datetime(arr)
assert result is arr

result = to_datetime(arr, box=True)
result = to_datetime(arr)
assert result is arr

def test_to_datetime_pydatetime(self):
@@ -363,9 +360,9 @@ def test_to_datetime_array_of_dt64s(self, cache):

# Assuming all datetimes are in bounds, to_datetime() returns
# an array that is equal to Timestamp() parsing
tm.assert_numpy_array_equal(
pd.to_datetime(dts, box=False, cache=cache),
np.array([Timestamp(x).asm8 for x in dts])
tm.assert_index_equal(
pd.to_datetime(dts, cache=cache),
pd.DatetimeIndex([Timestamp(x).asm8 for x in dts])
)

# A list of datetimes where the last one is out of bounds
@@ -375,28 +372,26 @@ def test_to_datetime_array_of_dt64s(self, cache):
with pytest.raises(OutOfBoundsDatetime, match=msg):
pd.to_datetime(dts_with_oob, errors='raise')

tm.assert_numpy_array_equal(
pd.to_datetime(dts_with_oob, box=False, errors='coerce',
tm.assert_index_equal(
pd.to_datetime(dts_with_oob, errors='coerce',
cache=cache),
np.array(
pd.DatetimeIndex(
[
Timestamp(dts_with_oob[0]).asm8,
Timestamp(dts_with_oob[1]).asm8,
tslib.iNaT,
],
dtype='M8'
pd.NaT
]
)
)

# With errors='ignore', out of bounds datetime64s
# are converted to their .item(), which depending on the version of
# numpy is either a python datetime.datetime or datetime.date
tm.assert_numpy_array_equal(
pd.to_datetime(dts_with_oob, box=False, errors='ignore',
tm.assert_index_equal(
pd.to_datetime(dts_with_oob, errors='ignore',
cache=cache),
np.array(
[dt.item() for dt in dts_with_oob],
dtype='O'
pd.Index(
[dt.item() for dt in dts_with_oob]
)
)

@@ -622,20 +617,16 @@ def test_datetime_invalid_index(self, values, format, infer):

@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
@pytest.mark.parametrize("box", [True, False])
@pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
def test_to_datetime_cache(self, utc, format, box, constructor):
def test_to_datetime_cache(self, utc, format, constructor):
date = '20130101 00:00:00'
test_dates = [date] * 10**5
data = constructor(test_dates)
result = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=True)
expected = pd.to_datetime(data, utc=utc, format=format, box=box,
cache=False)
if box:
tm.assert_index_equal(result, expected)
else:
tm.assert_numpy_array_equal(result, expected)

result = pd.to_datetime(data, utc=utc, format=format, cache=True)
expected = pd.to_datetime(data, utc=utc, format=format, cache=False)

tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("utc", [True, None])
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
@@ -684,7 +675,10 @@ def test_iso_8601_strings_with_same_offset(self):
def test_iso_8601_strings_same_offset_no_box(self):
# GH 22446
data = ['2018-01-04 09:01:00+09:00', '2018-01-04 09:02:00+09:00']
result = pd.to_datetime(data, box=False)

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(data, box=False)

expected = np.array([
datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540))
@@ -753,6 +747,16 @@ def test_timestamp_utc_true(self, ts, expected):
result = to_datetime(ts, utc=True)
assert result == expected

def test_to_datetime_box_deprecated(self):
expected = np.datetime64('2018-09-09')

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
pd.to_datetime(expected, box=False)

result = pd.to_datetime(expected).to_datetime64()
assert result == expected


class TestToDatetimeUnit(object):
@pytest.mark.parametrize('cache', [True, False])
@@ -891,7 +895,7 @@ def test_unit_rounding(self, cache):
def test_unit_ignore_keeps_name(self, cache):
# GH 21697
expected = pd.Index([15e9] * 2, name='name')
result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
result = pd.to_datetime(expected, errors='ignore', unit='s',
cache=cache)
tm.assert_index_equal(result, expected)

@@ -1052,7 +1056,10 @@ def test_dataframe_box_false(self):
df = pd.DataFrame({'year': [2015, 2016],
'month': [2, 3],
'day': [4, 5]})
result = pd.to_datetime(df, box=False)

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(df, box=False)

expected = np.array(['2015-02-04', '2016-03-05'],
dtype='datetime64[ns]')
tm.assert_numpy_array_equal(result, expected)
@@ -1069,8 +1076,7 @@ def test_dataframe_utc_true(self):

def test_to_datetime_errors_ignore_utc_true(self):
# GH 23758
result = pd.to_datetime([1], unit='s', box=True, utc=True,
errors='ignore')
result = pd.to_datetime([1], unit='s', utc=True, errors='ignore')
expected = DatetimeIndex(['1970-01-01 00:00:01'], tz='UTC')
tm.assert_index_equal(result, expected)

@@ -1188,19 +1194,16 @@ def test_to_datetime_types(self, cache):
# assert result == expected

@pytest.mark.parametrize('cache', [True, False])
@pytest.mark.parametrize('box, klass', [
[True, Index],
[False, np.array]
])
def test_to_datetime_unprocessable_input(self, cache, box, klass):
def test_to_datetime_unprocessable_input(self, cache):
# GH 4928
# GH 21864
result = to_datetime([1, '1'], errors='ignore', cache=cache, box=box)
expected = klass(np.array([1, '1'], dtype='O'))
result = to_datetime([1, '1'], errors='ignore', cache=cache)

expected = Index(np.array([1, '1'], dtype='O'))
tm.assert_equal(result, expected)
msg = "invalid string coercion to datetime"
with pytest.raises(TypeError, match=msg):
to_datetime([1, '1'], errors='raise', cache=cache, box=box)
to_datetime([1, '1'], errors='raise', cache=cache)

def test_to_datetime_other_datetime64_units(self):
# 5/25/2012
89 changes: 55 additions & 34 deletions pandas/tests/indexes/timedeltas/test_tools.py
Original file line number Diff line number Diff line change
@@ -19,15 +19,18 @@ def conv(v):

d1 = np.timedelta64(1, 'D')

assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
np.timedelta64(30, 'us')))
assert (to_timedelta('15.5us', box=False) ==
conv(np.timedelta64(15500, 'ns')))
with tm.assert_produces_warning(FutureWarning):
assert (to_timedelta('1 days 06:05:01.00003', box=False) ==
conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') +
np.timedelta64(30, 'us')))

# empty string
result = to_timedelta('', box=False)
assert result.astype('int64') == iNaT
with tm.assert_produces_warning(FutureWarning):
assert (to_timedelta('15.5us', box=False) ==
conv(np.timedelta64(15500, 'ns')))

# empty string
result = to_timedelta('', box=False)
assert result.astype('int64') == iNaT

result = to_timedelta(['', ''])
assert isna(result).all()
@@ -37,10 +40,11 @@ def conv(v):
expected = pd.Index(np.array([np.timedelta64(1, 's')]))
tm.assert_index_equal(result, expected)

# ints
result = np.timedelta64(0, 'ns')
expected = to_timedelta(0, box=False)
assert result == expected
with tm.assert_produces_warning(FutureWarning):
# ints
result = np.timedelta64(0, 'ns')
expected = to_timedelta(0, box=False)
assert result == expected

# Series
expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)])
@@ -53,16 +57,18 @@ def conv(v):
expected = to_timedelta([0, 10], unit='s')
tm.assert_index_equal(result, expected)

# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
with tm.assert_produces_warning(FutureWarning):
# single element conversion
v = timedelta(seconds=1)
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected

v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected
with tm.assert_produces_warning(FutureWarning):
v = np.timedelta64(timedelta(seconds=1))
result = to_timedelta(v, box=False)
expected = np.timedelta64(timedelta(seconds=1))
assert result == expected

# arrays of various dtypes
arr = np.array([1] * 5, dtype='int64')
@@ -90,22 +96,27 @@ def conv(v):
expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5)
tm.assert_index_equal(result, expected)

# Test with lists as input when box=false
expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]')
result = to_timedelta(range(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
# Test with lists as input when box=false
expected = np.array(np.arange(3) * 1000000000,
dtype='timedelta64[ns]')
result = to_timedelta(range(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

result = to_timedelta(np.arange(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
result = to_timedelta(np.arange(3), unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

result = to_timedelta([0, 1, 2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
result = to_timedelta([0, 1, 2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

# Tests with fractional seconds as input:
expected = np.array(
[0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)
with tm.assert_produces_warning(FutureWarning):
# Tests with fractional seconds as input:
expected = np.array(
[0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]')
result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False)
tm.assert_numpy_array_equal(expected, result)

def test_to_timedelta_invalid(self):

@@ -188,3 +199,13 @@ def test_to_timedelta_float(self):
result = pd.to_timedelta(arr, unit='s')
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
tm.assert_numpy_array_equal(result.asi8, expected_asi8)

def test_to_timedelta_box_deprecated(self):
result = np.timedelta64(0, 'ns')

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
to_timedelta(0, box=False)

expected = to_timedelta(0).to_timedelta64()
assert result == expected
8 changes: 4 additions & 4 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
@@ -318,12 +318,12 @@ def test_iso_conversion(self):
assert to_timedelta('P0DT0H0M1S') == expected

def test_nat_converters(self):
result = to_timedelta('nat', box=False)
assert result.dtype.kind == 'm'
result = to_timedelta('nat').to_numpy()
assert result.dtype.kind == 'M'
assert result.astype('int64') == iNaT

result = to_timedelta('nan', box=False)
assert result.dtype.kind == 'm'
result = to_timedelta('nan').to_numpy()
assert result.dtype.kind == 'M'
assert result.astype('int64') == iNaT

@pytest.mark.filterwarnings("ignore:M and Y units are deprecated")