From 71ffcf5ceec4d1247d11e813129f5f758df64cb7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Jan 2019 14:07:12 -0600 Subject: [PATCH 1/3] Added Datetime & Timedelta inference to array Closes https://github.com/pandas-dev/pandas/issues/24568 --- pandas/core/arrays/array_.py | 32 ++++++++++++++++++-------- pandas/tests/arrays/test_array.py | 38 +++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index 173ed7d191ac9..cd7929cd4ddd5 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -46,12 +46,14 @@ def array(data, # type: Sequence[object] Currently, pandas will infer an extension dtype for sequences of - ========================== ================================== - scalar type Array Type - ========================== ================================== - * :class:`pandas.Interval` :class:`pandas.IntervalArray` - * :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` - ========================== ================================== + ============================== ===================================== + scalar type Array Type + ============================= ===================================== + * :class:`pandas.Interval` :class:`pandas.IntervalArray` + * :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + * :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` + * :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` + ============================= ===================================== For all other cases, NumPy's usual inference rules will be used. @@ -62,7 +64,8 @@ def array(data, # type: Sequence[object] Returns ------- - array : ExtensionArray + ExtensionArray + The newly created array. Raises ------ @@ -180,7 +183,9 @@ def array(data, # type: Sequence[object] ValueError: Cannot pass scalar '1' to 'pandas.array'. """ from pandas.core.arrays import ( - period_array, ExtensionArray, IntervalArray, PandasArray + period_array, ExtensionArray, IntervalArray, PandasArray, + DatetimeArrayMixin, + TimedeltaArrayMixin, ) from pandas.core.internals.arrays import extract_array @@ -220,7 +225,16 @@ def array(data, # type: Sequence[object] # We choose to return an ndarray, rather than raising. pass - # TODO(DatetimeArray): handle this type + elif inferred_dtype == 'datetime': + try: + return DatetimeArrayMixin._from_sequence(data, copy=copy) + except ValueError: + # Mixture of timezones, fall back to PandasArray + pass + + elif inferred_dtype == 'timedelta': + return TimedeltaArrayMixin._from_sequence(data, copy=copy) + # TODO(BooleanArray): handle this type result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 76ef85b0317ad..57ea79ae38ee2 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -1,7 +1,9 @@ +import datetime import decimal import numpy as np import pytest +import pytz from pandas.core.dtypes.dtypes import registry @@ -89,11 +91,38 @@ def test_array_copy(): assert np.shares_memory(a, b._ndarray) is True +cet = pytz.timezone("CET") + + @pytest.mark.parametrize('data, expected', [ + # period ([pd.Period("2000", "D"), pd.Period("2001", "D")], period_array(["2000", "2001"], freq="D")), + + # interval ([pd.Interval(0, 1), pd.Interval(1, 2)], pd.IntervalArray.from_breaks([0, 1, 2])), + + # datetime + ([pd.Timestamp('2000',), pd.Timestamp('2001')], + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + + ([datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + + # datetimetz + ([pd.Timestamp('2000', tz='CET'), pd.Timestamp('2001', tz='CET')], + pd.arrays.DatetimeArray._from_sequence( + ['2000', '2001'], dtype=pd.DatetimeTZDtype(tz='CET'))), + + ([datetime.datetime(2000, 1, 1, tzinfo=cet), + datetime.datetime(2001, 1, 1, tzinfo=cet)], + pd.arrays.DatetimeArray._from_sequence(['2000', '2001'], + tz=cet)), + + # timedelta + ([pd.Timedelta('1H'), pd.Timedelta('2H')], + pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), ]) def test_array_inference(data, expected): result = pd.array(data) @@ -105,6 +134,15 @@ def test_array_inference(data, expected): [pd.Period("2000", "D"), pd.Period("2001", "A")], # mix of closed [pd.Interval(0, 1, closed='left'), pd.Interval(1, 2, closed='right')], + # Mix of timezones + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")], + # Mix of tz-aware and tz-naive + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")], + # GH-24569 + pytest.param( + np.array([pd.Timestamp('2000'), pd.Timestamp('2000', tz='CET')]), + marks=pytest.mark.xfail(reason="bug in DTA._from_sequence") + ), ]) def test_array_inference_fails(data): result = pd.array(data) From c7056fea6c980443f4346f097d42150652b8a28d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Jan 2019 14:34:59 -0600 Subject: [PATCH 2/3] trigger ci From 3d9cebe8b17d66a9425bb68431139be03188f5b3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 2 Jan 2019 15:47:09 -0600 Subject: [PATCH 3/3] Handle datetime64 and timedelta64 --- pandas/core/arrays/array_.py | 6 ++++-- pandas/tests/arrays/test_array.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index cd7929cd4ddd5..4e84c62bce3d6 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -225,14 +225,16 @@ def array(data, # type: Sequence[object] # We choose to return an ndarray, rather than raising. pass - elif inferred_dtype == 'datetime': + elif inferred_dtype.startswith('datetime'): + # datetime, datetime64 try: return DatetimeArrayMixin._from_sequence(data, copy=copy) except ValueError: # Mixture of timezones, fall back to PandasArray pass - elif inferred_dtype == 'timedelta': + elif inferred_dtype.startswith('timedelta'): + # timedelta, timedelta64 return TimedeltaArrayMixin._from_sequence(data, copy=copy) # TODO(BooleanArray): handle this type diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 57ea79ae38ee2..1d09a1f65e43f 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -110,6 +110,12 @@ def test_array_copy(): ([datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], pd.arrays.DatetimeArray._from_sequence(['2000', '2001'])), + (np.array([1, 2], dtype='M8[ns]'), + pd.arrays.DatetimeArray(np.array([1, 2], dtype='M8[ns]'))), + + (np.array([1, 2], dtype='M8[us]'), + pd.arrays.DatetimeArray(np.array([1000, 2000], dtype='M8[ns]'))), + # datetimetz ([pd.Timestamp('2000', tz='CET'), pd.Timestamp('2001', tz='CET')], pd.arrays.DatetimeArray._from_sequence( @@ -123,6 +129,13 @@ def test_array_copy(): # timedelta ([pd.Timedelta('1H'), pd.Timedelta('2H')], pd.arrays.TimedeltaArray._from_sequence(['1H', '2H'])), + + (np.array([1, 2], dtype='m8[ns]'), + pd.arrays.TimedeltaArray(np.array([1, 2], dtype='m8[ns]'))), + + (np.array([1, 2], dtype='m8[us]'), + pd.arrays.TimedeltaArray(np.array([1000, 2000], dtype='m8[ns]'))), + ]) def test_array_inference(data, expected): result = pd.array(data)