Skip to content

Commit 6cd8e0f

Browse files
committed
BUG: More followups on to_datetime exceptions, xref #13033
closes #13059
1 parent c6110e2 commit 6cd8e0f

File tree

4 files changed

+78
-21
lines changed

4 files changed

+78
-21
lines changed

doc/source/whatsnew/v0.18.1.txt

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ In addition to this error change, several others have been made as well:
478478
``to_datetime`` error changes
479479
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
480480

481-
Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`, :issue:`13052`)
481+
Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'``. Furthermore, an ``OutOfBoundsDateime`` exception will be raised when an out-of-range value is encountered for that unit when ``errors='raise'``. (:issue:`11758`, :issue:`13052`, :issue:`13059`)
482482

483483
Previous behaviour:
484484

@@ -490,12 +490,21 @@ Previous behaviour:
490490
In [28]: pd.to_datetime(11111111, unit='D', errors='ignore')
491491
OverflowError: Python int too large to convert to C long
492492

493+
In [29]: pd.to_datetime(11111111, unit='D', errors='raise')
494+
OverflowError: Python int too large to convert to C long
495+
493496
New behaviour:
494497

495-
.. ipython:: python
498+
.. code-block:: python
499+
500+
In [2]: pd.to_datetime(1420043460, unit='s', errors='coerce')
501+
Out[2]: Timestamp('2014-12-31 16:31:00')
502+
503+
In [3]: pd.to_datetime(11111111, unit='D', errors='ignore')
504+
Out[3]: 11111111
496505

497-
pd.to_datetime(1420043460, unit='s', errors='coerce')
498-
pd.to_datetime(11111111, unit='D', errors='ignore')
506+
In [4]: pd.to_datetime(11111111, unit='D', errors='raise')
507+
OutOfBoundsDatetime: cannot convert input with unit 'D'
499508

500509
.. _whatsnew_0181.api.other:
501510

pandas/tseries/tests/test_timeseries.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4249,7 +4249,7 @@ def test_unit_errors(self):
42494249
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
42504250
tm.assert_index_equal(result, expected)
42514251

4252-
with self.assertRaises(ValueError):
4252+
with self.assertRaises(tslib.OutOfBoundsDatetime):
42534253
to_datetime(values, unit='D', errors='raise')
42544254

42554255
values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT']
@@ -4263,9 +4263,33 @@ def test_unit_errors(self):
42634263
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
42644264
tm.assert_index_equal(result, expected)
42654265

4266-
with self.assertRaises(ValueError):
4266+
with self.assertRaises(tslib.OutOfBoundsDatetime):
42674267
to_datetime(values, errors='raise', unit='s')
42684268

4269+
# if we have a string, then we raise a ValueError
4270+
# and NOT an OutOfBoundsDatetime
4271+
for val in ['foo', Timestamp('20130101')]:
4272+
try:
4273+
to_datetime(val, errors='raise', unit='s')
4274+
except tslib.OutOfBoundsDatetime:
4275+
raise AssertionError("incorrect exception raised")
4276+
except ValueError:
4277+
pass
4278+
4279+
# consistency of conversions
4280+
expected = Timestamp('1970-05-09 14:25:11')
4281+
result = pd.to_datetime(11111111, unit='s', errors='raise')
4282+
self.assertEqual(result, expected)
4283+
self.assertIsInstance(result, Timestamp)
4284+
4285+
result = pd.to_datetime(11111111, unit='s', errors='coerce')
4286+
self.assertEqual(result, expected)
4287+
self.assertIsInstance(result, Timestamp)
4288+
4289+
result = pd.to_datetime(11111111, unit='s', errors='ignore')
4290+
self.assertEqual(result, expected)
4291+
self.assertIsInstance(result, Timestamp)
4292+
42694293
def test_roundtrip(self):
42704294

42714295
# test value to string and back conversions

pandas/tseries/tools.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def _convert_listlike(arg, box, format, name=None):
332332
if box:
333333
if errors == 'ignore':
334334
from pandas import Index
335-
return Index(result, dtype=object)
335+
return Index(result)
336336

337337
return DatetimeIndex(result, tz='utc' if utc else None,
338338
name=name)

pandas/tslib.pyx

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,9 +1982,12 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
19821982
"""
19831983
convert the ndarray according to the unit
19841984
if errors:
1985-
- raise: return converted values or raise
1985+
- raise: return converted values or raise OutOfBoundsDatetime
1986+
if out of range on the conversion or
1987+
ValueError for other conversions (e.g. a string)
19861988
- ignore: return non-convertible values as the same unit
19871989
- coerce: NaT for non-convertibles
1990+
19881991
"""
19891992
cdef:
19901993
Py_ssize_t i, j, n=len(values)
@@ -2023,7 +2026,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
20232026
if not need_to_iterate:
20242027

20252028
if (fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any():
2026-
raise ValueError("cannot convert input with unit: {0}".format(unit))
2029+
raise OutOfBoundsDatetime("cannot convert input with unit '{0}'".format(unit))
20272030
result = (iresult*m).astype('M8[ns]')
20282031
iresult = result.view('i8')
20292032
iresult[mask] = iNaT
@@ -2046,9 +2049,14 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
20462049
else:
20472050
try:
20482051
iresult[i] = cast_from_unit(val, unit)
2049-
except:
2050-
if is_ignore or is_raise:
2051-
raise
2052+
except OverflowError:
2053+
if is_raise:
2054+
raise OutOfBoundsDatetime("cannot convert input {0}"
2055+
"with the unit '{1}'".format(
2056+
val,
2057+
unit))
2058+
elif is_ignore:
2059+
raise AssertionError
20522060
iresult[i] = NPY_NAT
20532061

20542062
elif util.is_string_object(val):
@@ -2058,24 +2066,40 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
20582066
else:
20592067
try:
20602068
iresult[i] = cast_from_unit(float(val), unit)
2069+
except ValueError:
2070+
if is_raise:
2071+
raise ValueError("non convertible value {0}"
2072+
"with the unit '{1}'".format(
2073+
val,
2074+
unit))
2075+
elif is_ignore:
2076+
raise AssertionError
20612077
except:
2062-
if is_ignore or is_raise:
2063-
raise
2078+
if is_raise:
2079+
raise OutOfBoundsDatetime("cannot convert input {0}"
2080+
"with the unit '{1}'".format(
2081+
val,
2082+
unit))
2083+
elif is_ignore:
2084+
raise AssertionError
20642085
iresult[i] = NPY_NAT
20652086

20662087
else:
20672088

2068-
if is_ignore or is_raise:
2069-
raise ValueError
2089+
if is_raise:
2090+
raise ValueError("non convertible value {0}"
2091+
"with the unit '{1}'".format(
2092+
val,
2093+
unit))
2094+
if is_ignore:
2095+
raise AssertionError
2096+
20702097
iresult[i] = NPY_NAT
20712098

20722099
return result
20732100

2074-
except (OverflowError, ValueError) as e:
2075-
2076-
# we cannot process and are done
2077-
if is_raise:
2078-
raise ValueError("cannot convert input with the unit: {0}".format(unit))
2101+
except AssertionError:
2102+
pass
20792103

20802104
# we have hit an exception
20812105
# and are in ignore mode

0 commit comments

Comments
 (0)