Skip to content

Commit 286782d

Browse files
committed
BUG: to_datetime when called with a unit and coerce is buggy
closes #11758 Author: Jeff Reback <[email protected]> Closes #13033 from jreback/to_datetime and squashes the following commits: ed3cdf0 [Jeff Reback] BUG: to_datetime when called with a unit and coerce is buggy
1 parent 5f3facf commit 286782d

File tree

5 files changed

+231
-26
lines changed

5 files changed

+231
-26
lines changed

doc/source/whatsnew/v0.18.1.txt

+24-2
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,30 @@ In addition to this error change, several others have been made as well:
428428
- ``pd.read_csv()`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`)
429429

430430

431+
.. _whatsnew_0181.api.to_datetime:
432+
433+
``to_datetime`` error changes
434+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
435+
436+
Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`)
437+
438+
Previous behaviour:
439+
440+
.. code-block:: python
441+
442+
In [27]: pd.to_datetime(1420043460, unit='s', errors='coerce')
443+
Out[27]: NaT
444+
445+
In [28]: pd.to_datetime(11111111, unit='D', errors='ignore')
446+
OverflowError: Python int too large to convert to C long
447+
448+
New behaviour:
449+
450+
.. ipython:: python
451+
452+
pd.to_datetime(1420043460, unit='s', errors='coerce')
453+
pd.to_datetime(11111111, unit='D', errors='ignore')
454+
431455
.. _whatsnew_0181.api.other:
432456

433457
Other API changes
@@ -444,7 +468,6 @@ Other API changes
444468
- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`)
445469
- ``pd.merge()`` and ``DataFrame.join()`` will show a ``UserWarning`` when merging/joining a single- with a multi-leveled dataframe (:issue:`9455`, :issue:`12219`)
446470

447-
448471
.. _whatsnew_0181.deprecations:
449472

450473
Deprecations
@@ -514,7 +537,6 @@ Bug Fixes
514537
- Bug in aligning a ``Series`` with a ``DataFrame`` (:issue:`13037`)
515538

516539

517-
518540
- Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`)
519541

520542

pandas/io/json.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def _try_convert_to_date(self, data):
400400
try:
401401
new_data = to_datetime(new_data, errors='raise',
402402
unit=date_unit)
403-
except OverflowError:
403+
except ValueError:
404404
continue
405405
except:
406406
break

pandas/tseries/tests/test_timeseries.py

+39
Original file line numberDiff line numberDiff line change
@@ -4151,6 +4151,7 @@ def test_basics_nanos(self):
41514151
self.assertEqual(stamp.nanosecond, 500)
41524152

41534153
def test_unit(self):
4154+
41544155
def check(val, unit=None, h=1, s=1, us=0):
41554156
stamp = Timestamp(val, unit=unit)
41564157
self.assertEqual(stamp.year, 2000)
@@ -4217,6 +4218,44 @@ def check(val, unit=None, h=1, s=1, us=0):
42174218
result = Timestamp('NaT')
42184219
self.assertIs(result, NaT)
42194220

4221+
def test_unit_errors(self):
4222+
# GH 11758
4223+
# test proper behavior with erros
4224+
4225+
with self.assertRaises(ValueError):
4226+
to_datetime([1], unit='D', format='%Y%m%d')
4227+
4228+
values = [11111111, 1, 1.0, tslib.iNaT, pd.NaT, np.nan,
4229+
'NaT', '']
4230+
result = to_datetime(values, unit='D', errors='ignore')
4231+
expected = Index([11111111, Timestamp('1970-01-02'),
4232+
Timestamp('1970-01-02'), pd.NaT,
4233+
pd.NaT, pd.NaT, pd.NaT, pd.NaT],
4234+
dtype=object)
4235+
tm.assert_index_equal(result, expected)
4236+
4237+
result = to_datetime(values, unit='D', errors='coerce')
4238+
expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
4239+
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
4240+
tm.assert_index_equal(result, expected)
4241+
4242+
with self.assertRaises(ValueError):
4243+
to_datetime(values, unit='D', errors='raise')
4244+
4245+
values = [1420043460000, tslib.iNaT, pd.NaT, np.nan, 'NaT']
4246+
4247+
result = to_datetime(values, errors='ignore', unit='s')
4248+
expected = Index([1420043460000, pd.NaT, pd.NaT,
4249+
pd.NaT, pd.NaT], dtype=object)
4250+
tm.assert_index_equal(result, expected)
4251+
4252+
result = to_datetime(values, errors='coerce', unit='s')
4253+
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
4254+
tm.assert_index_equal(result, expected)
4255+
4256+
with self.assertRaises(ValueError):
4257+
to_datetime(values, errors='raise', unit='s')
4258+
42204259
def test_roundtrip(self):
42214260

42224261
# test value to string and back conversions

pandas/tseries/tools.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _guess_datetime_format_for_array(arr, **kwargs):
170170
mapping={True: 'coerce', False: 'raise'})
171171
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
172172
utc=None, box=True, format=None, exact=True, coerce=None,
173-
unit='ns', infer_datetime_format=False):
173+
unit=None, infer_datetime_format=False):
174174
"""
175175
Convert argument to datetime.
176176
@@ -293,7 +293,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
293293

294294
def _to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
295295
utc=None, box=True, format=None, exact=True,
296-
unit='ns', freq=None, infer_datetime_format=False):
296+
unit=None, freq=None, infer_datetime_format=False):
297297
"""
298298
Same as to_datetime, but accept freq for
299299
DatetimeIndex internal construction
@@ -323,9 +323,17 @@ def _convert_listlike(arg, box, format, name=None):
323323
arg = arg.tz_convert(None).tz_localize('UTC')
324324
return arg
325325

326-
elif format is None and com.is_integer_dtype(arg) and unit == 'ns':
327-
result = arg.astype('datetime64[ns]')
326+
elif unit is not None:
327+
if format is not None:
328+
raise ValueError("cannot specify both format and unit")
329+
arg = getattr(arg, 'values', arg)
330+
result = tslib.array_with_unit_to_datetime(arg, unit,
331+
errors=errors)
328332
if box:
333+
if errors == 'ignore':
334+
from pandas import Index
335+
return Index(result, dtype=object)
336+
329337
return DatetimeIndex(result, tz='utc' if utc else None,
330338
name=name)
331339
return result
@@ -387,7 +395,6 @@ def _convert_listlike(arg, box, format, name=None):
387395
dayfirst=dayfirst,
388396
yearfirst=yearfirst,
389397
freq=freq,
390-
unit=unit,
391398
require_iso8601=require_iso8601
392399
)
393400

0 commit comments

Comments
 (0)