From b42d1dc577be0e1e4bcf8db7b6bddf0b03c158f1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 May 2016 15:35:54 -0400 Subject: [PATCH] BUG: introduced in #13033 closes #13052 --- doc/source/whatsnew/v0.18.1.txt | 2 +- pandas/tseries/tests/test_timeseries.py | 10 +++++ pandas/tslib.pyx | 55 ++++++++++++------------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 3345cb3d29926..1a79601bee384 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -478,7 +478,7 @@ In addition to this error change, several others have been made as well: ``to_datetime`` error changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`) +Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'`` (:issue:`11758`, :issue:`13052`) Previous behaviour: diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 034c31b33bce8..15e9136d78243 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -752,6 +752,16 @@ def test_to_datetime_unit(self): seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) + result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') + expected = DatetimeIndex([Timestamp('1970-01-02'), + Timestamp('1970-01-03')] + ['NaT'] * 3) + tm.assert_index_equal(result, expected) + + with self.assertRaises(ValueError): + to_datetime([1, 2, 'foo'], unit='D') + with self.assertRaises(ValueError): + to_datetime([1, 2, 111111111], unit='D') + def test_series_ctor_datetime64(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') dates = np.asarray(rng) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 56c0dc875f7bf..9b7942400d3a9 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1992,6 +1992,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): ndarray[float64_t] fvalues ndarray mask bint is_ignore=errors=='ignore', is_coerce=errors=='coerce', is_raise=errors=='raise' + bint need_to_iterate=True ndarray[int64_t] iresult ndarray[object] oresult @@ -2006,33 +2007,28 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): if is_raise: - # we can simply raise if there is a conversion - # issue; but we need to mask the nulls - # we need to guard against out-of-range conversions - # to i8 + # try a quick conversion to i8 + # if we have nulls that are not type-compat + # then need to iterate try: iresult = values.astype('i8') mask = iresult == iNaT iresult[mask] = 0 + fvalues = iresult.astype('f8') * m + need_to_iterate=False except: + pass - # we have nulls embedded - from pandas import isnull - - values = values.astype('object') - mask = isnull(values) - values[mask] = 0 - iresult = values.astype('i8') + # check the bounds + if not need_to_iterate: - fvalues = iresult.astype('f8') * m - if (fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any(): - raise ValueError("cannot convert input with unit: {0}".format(unit)) - result = (values*m).astype('M8[ns]') - iresult = result.view('i8') - iresult[mask] = iNaT - return result + if (fvalues < _NS_LOWER_BOUND).any() or (fvalues > _NS_UPPER_BOUND).any(): + raise ValueError("cannot convert input with unit: {0}".format(unit)) + result = (iresult*m).astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = iNaT + return result - # coerce or ignore result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -2051,7 +2047,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): try: iresult[i] = cast_from_unit(val, unit) except: - if is_ignore: + if is_ignore or is_raise: raise iresult[i] = NPY_NAT @@ -2063,24 +2059,27 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): try: iresult[i] = cast_from_unit(float(val), unit) except: - if is_ignore: + if is_ignore or is_raise: raise iresult[i] = NPY_NAT else: - if is_ignore: - raise Exception + if is_ignore or is_raise: + raise ValueError iresult[i] = NPY_NAT return result - except: - pass + except (OverflowError, ValueError) as e: + + # we cannot process and are done + if is_raise: + raise ValueError("cannot convert input with the unit: {0}".format(unit)) - # we have hit an exception - # and are in ignore mode - # redo as object + # we have hit an exception + # and are in ignore mode + # redo as object oresult = np.empty(n, dtype=object) for i in range(n):