From bc34fbca73aacc7ac2cb837edeb0cc52bdad9e08 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 3 Mar 2014 21:21:19 -0800 Subject: [PATCH 1/6] Create Index objects even if dates are out of bounds Occasionally, it can be useful to work with datetime objects that cannot be expressed as np.datetime64 objects with ns precision. PeriodIndex is currently the only way to make a pandas.Index object consisting of such datetime objects. But sometimes we really want our index objects to consist of datetimes, even if they are a less efficient way to represent dates. This patch allows for falling back to creating generic Index objects if an OutOfBoundsDatetime exception is encountered when attempting to create an Index. --- pandas/core/index.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index c16e2eff06904..d5252ff0ee834 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -111,7 +111,11 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, if isinstance(data, (np.ndarray, ABCSeries)): if issubclass(data.dtype.type, np.datetime64): from pandas.tseries.index import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, **kwargs) + try: + result = DatetimeIndex(data, copy=copy, name=name, + **kwargs) + except tslib.OutOfBoundsDatetime: + pass if dtype is not None and _o_dtype == dtype: return Index(result.to_pydatetime(), dtype=_o_dtype) else: @@ -154,7 +158,11 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, if (inferred.startswith('datetime') or tslib.is_timestamp_array(subarr)): from pandas.tseries.index import DatetimeIndex - return DatetimeIndex(data, copy=copy, name=name, **kwargs) + try: + return DatetimeIndex(data, copy=copy, name=name, + **kwargs) + except tslib.OutOfBoundsDatetime: + pass elif inferred == 'period': return PeriodIndex(subarr, name=name, **kwargs) From d8fe97d1909db44185cd3eb46a5542bc21810171 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 3 Mar 2014 21:59:53 -0800 Subject: [PATCH 2/6] Unit test for Index constructor OutOfBoundsDatetime --- pandas/tests/test_index.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 3e578a5e36bb1..bc7313871a1bc 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -171,6 +171,16 @@ def test_constructor_from_series(self): result = pd.infer_freq(df['date']) self.assertEqual(result,'MS') + def test_constructor_out_of_bounds_datetime(self): + arr = np.array([datetime(1000, 1, 1)]) + result = Index(arr) + self.assert_numpy_array_equal(arr, result.values) + + _skip_if_need_numpy_1_7() + arr = np.array([np.datetime64(datetime(1000, 1, 1))]) + result = Index(arr) + self.assert_numpy_array_equal(arr, result.values) + def test_index_ctor_infer_periodindex(self): from pandas import period_range, PeriodIndex xp = period_range('2012-1-1', freq='M', periods=3) From 0f9562c9ee3781f500aa5d7b0939c460cfddcb9f Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 3 Mar 2014 22:29:44 -0800 Subject: [PATCH 3/6] Refined constructor and more test cases --- pandas/core/index.py | 2 +- pandas/tests/test_index.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index d5252ff0ee834..842864d2ecd64 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -161,7 +161,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, try: return DatetimeIndex(data, copy=copy, name=name, **kwargs) - except tslib.OutOfBoundsDatetime: + except (tslib.OutOfBoundsDatetime, ValueError): pass elif inferred == 'period': return PeriodIndex(subarr, name=name, **kwargs) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index bc7313871a1bc..6da19d08feb55 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -172,14 +172,16 @@ def test_constructor_from_series(self): self.assertEqual(result,'MS') def test_constructor_out_of_bounds_datetime(self): - arr = np.array([datetime(1000, 1, 1)]) - result = Index(arr) - self.assert_numpy_array_equal(arr, result.values) + for arr in [[datetime(1000, 1, 1)], + np.array([datetime(1000, 1, 1)])]: + result = Index(arr) + self.assert_numpy_array_equal(arr, result.values) _skip_if_need_numpy_1_7() - arr = np.array([np.datetime64(datetime(1000, 1, 1))]) - result = Index(arr) - self.assert_numpy_array_equal(arr, result.values) + for arr in [[np.datetime64(datetime(1000, 1, 1))], + np.array([np.datetime64(datetime(1000, 1, 1))])]: + result = Index(arr) + self.assert_numpy_array_equal(arr, result.values) def test_index_ctor_infer_periodindex(self): from pandas import period_range, PeriodIndex From 2bb6230871a42feb8dfc48ed8aa9761396791aab Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 3 Mar 2014 22:46:30 -0800 Subject: [PATCH 4/6] Caught edge case and fixed tests --- pandas/core/index.py | 7 ++++--- pandas/tests/test_index.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 842864d2ecd64..559d6611124cf 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -116,10 +116,11 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False, **kwargs) except tslib.OutOfBoundsDatetime: pass - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pydatetime(), dtype=_o_dtype) else: - return result + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pydatetime(), dtype=_o_dtype) + else: + return result elif issubclass(data.dtype.type, np.timedelta64): return Int64Index(data, copy=copy, name=name) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 6da19d08feb55..f0c8064d52984 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -172,16 +172,16 @@ def test_constructor_from_series(self): self.assertEqual(result,'MS') def test_constructor_out_of_bounds_datetime(self): - for arr in [[datetime(1000, 1, 1)], - np.array([datetime(1000, 1, 1)])]: - result = Index(arr) - self.assert_numpy_array_equal(arr, result.values) + expected = np.array([datetime(1000, 1, 1)]) + self.assert_numpy_array_equal(expected, Index(expected).values) + expected_list = [datetime(1000, 1, 1)] + self.assert_numpy_array_equal(expected, Index(expected_list).values) _skip_if_need_numpy_1_7() - for arr in [[np.datetime64(datetime(1000, 1, 1))], - np.array([np.datetime64(datetime(1000, 1, 1))])]: - result = Index(arr) - self.assert_numpy_array_equal(arr, result.values) + expected = np.array([np.datetime64(datetime(1000, 1, 1))]) + self.assert_numpy_array_equal(expected, Index(expected).values) + expected_list = [np.datetime64(datetime(1000, 1, 1))] + self.assert_numpy_array_equal(expected, Index(expected_list).values) def test_index_ctor_infer_periodindex(self): from pandas import period_range, PeriodIndex From 6cca929a8b0f9a27ec7bf21289d56f098032ed52 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 4 Mar 2014 00:51:52 -0800 Subject: [PATCH 5/6] more tweaked tests --- pandas/tests/test_index.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index f0c8064d52984..dfd1090b84b76 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -181,6 +181,8 @@ def test_constructor_out_of_bounds_datetime(self): expected = np.array([np.datetime64(datetime(1000, 1, 1))]) self.assert_numpy_array_equal(expected, Index(expected).values) expected_list = [np.datetime64(datetime(1000, 1, 1))] + print expected.shape, Index(expected_list).values.shape + print expected.dtype, Index(expected_list).values.dtype self.assert_numpy_array_equal(expected, Index(expected_list).values) def test_index_ctor_infer_periodindex(self): From bb53ee5a96ea96b81dd91b323167d2e2ffa7b77a Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 4 Mar 2014 01:03:32 -0800 Subject: [PATCH 6/6] fix print function --- pandas/tests/test_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index dfd1090b84b76..c3275cdd96de2 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -181,8 +181,8 @@ def test_constructor_out_of_bounds_datetime(self): expected = np.array([np.datetime64(datetime(1000, 1, 1))]) self.assert_numpy_array_equal(expected, Index(expected).values) expected_list = [np.datetime64(datetime(1000, 1, 1))] - print expected.shape, Index(expected_list).values.shape - print expected.dtype, Index(expected_list).values.dtype + print(expected.shape, Index(expected_list).values.shape) + print(expected.dtype, Index(expected_list).values.dtype) self.assert_numpy_array_equal(expected, Index(expected_list).values) def test_index_ctor_infer_periodindex(self):