From 492f3bbfa373f16b6b11a51e9ce32cf14d7ebb7e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 12:16:59 -0700 Subject: [PATCH 1/3] BUG: fix array_equivalent with mismatched tzawareness --- pandas/_libs/lib.pyx | 16 +++++++++++++--- pandas/core/dtypes/missing.py | 10 ++++++++-- pandas/core/indexes/base.py | 9 +++------ pandas/tests/dtypes/test_missing.py | 9 +++++++++ 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 594de703258a4..841b453bcd6be 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -55,8 +55,7 @@ cimport pandas._libs.util as util from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN from pandas._libs.tslib import array_to_datetime -from pandas._libs.tslibs.nattype cimport NPY_NAT -from pandas._libs.tslibs.nattype import NaT +from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.conversion cimport convert_to_tsobject from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare @@ -523,9 +522,20 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: x = left[i] y = right[i] + # Avoid raising TypeError on tzawareness mismatch + if PyDateTime_Check(x) and PyDateTime_Check(y): + if x is NaT and y is NaT: + pass + elif x.tzinfo is None and y.tzinfo is not None: + return False + elif x.tzinfo is not None and y.tzinfo is None: + return False + elif x != y: + return False + # we are either not equal or both nan # I think None == None will be true here - if not (PyObject_RichCompareBool(x, y, Py_EQ) or + elif not (PyObject_RichCompareBool(x, y, Py_EQ) or (x is None or is_nan(x)) and (y is None or is_nan(y))): return False return True diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 6dd032b9248ed..cd87fbef02e4f 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -445,8 +445,14 @@ def array_equivalent(left, right, strict_nan=False): if not isinstance(right_value, float) or not np.isnan(right_value): return False else: - if np.any(left_value != right_value): - return False + try: + if np.any(left_value != right_value): + return False + except TypeError as err: + if "Cannot compare tz-naive" in str(err): + # tzawareness compat failure, see GH#28507 + return False + raise return True # NaNs can occur in float and complex arrays. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6ef9d78ff9e97..c7e9dd5f0ea6d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4324,12 +4324,9 @@ def equals(self, other): # if other is not object, use other's logic for coercion return other.equals(self) - try: - return array_equivalent( - com.values_from_object(self), com.values_from_object(other) - ) - except Exception: - return False + return array_equivalent( + com.values_from_object(self), com.values_from_object(other) + ) def identical(self, other): """ diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 1a292d5bfcbb6..2b1b23116b86c 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -332,6 +332,15 @@ def test_array_equivalent(): assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) +def test_array_equivalent_tzawareness(): + # we shouldn't raise if comparing tzaware and tznaive datetimes + left = np.array([pd.Timestamp.now()], dtype=object) + right = np.array([pd.Timestamp.now("UTC")], dtype=object) + + assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=False) + + def test_array_equivalent_compat(): # see gh-13388 m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) From 73d85d52d129e8b33aeabc437fab8f01a7c6e8ae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 14:24:07 -0700 Subject: [PATCH 2/3] Fix datetime64 case --- pandas/_libs/lib.pyx | 24 +++++++++++------------- pandas/tests/dtypes/test_missing.py | 22 +++++++++++++++++++--- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 841b453bcd6be..1c2f80b832201 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -522,22 +522,20 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: x = left[i] y = right[i] - # Avoid raising TypeError on tzawareness mismatch - if PyDateTime_Check(x) and PyDateTime_Check(y): - if x is NaT and y is NaT: - pass - elif x.tzinfo is None and y.tzinfo is not None: - return False - elif x.tzinfo is not None and y.tzinfo is None: + # we are either not equal or both nan + # I think None == None will be true here + try: + if not (PyObject_RichCompareBool(x, y, Py_EQ) or + (x is None or is_nan(x)) and (y is None or is_nan(y))): return False - elif x != y: + except TypeError as err: + # Avoid raising TypeError on tzawareness mismatch + # TODO: This try/except can be removed if/when Timestamp + # comparisons are change dto match datetime, see GH#28507 + if "tz-naive and tz-aware" in str(err): return False + raise - # we are either not equal or both nan - # I think None == None will be true here - elif not (PyObject_RichCompareBool(x, y, Py_EQ) or - (x is None or is_nan(x)) and (y is None or is_nan(y))): - return False return True diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 2b1b23116b86c..2f2e098d1982f 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -332,10 +332,26 @@ def test_array_equivalent(): assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) -def test_array_equivalent_tzawareness(): +@pytest.mark.parametrize( + "lvalue", + [ + pd.Timestamp.now(), + pd.Timestamp.now().to_datetime64(), + pd.Timestamp.now().to_pydatetime(), + ], +) +@pytest.mark.parametrize( + "rvalue", + [ + pd.Timestamp.now("UTC"), + pd.Timestamp.now().to_datetime64(), + pd.Timestamp.now("UTC").to_pydatetime(), + ], +) +def test_array_equivalent_tzawareness(lvalue, rvalue): # we shouldn't raise if comparing tzaware and tznaive datetimes - left = np.array([pd.Timestamp.now()], dtype=object) - right = np.array([pd.Timestamp.now("UTC")], dtype=object) + left = np.array([lvalue], dtype=object) + right = np.array([rvalue], dtype=object) assert not array_equivalent(left, right, strict_nan=True) assert not array_equivalent(left, right, strict_nan=False) From 76237c95cfb1c0db54f3daf15515995869e3bdd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 16:04:31 -0700 Subject: [PATCH 3/3] exclude datetime64 utcnow --- pandas/tests/dtypes/test_missing.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 2f2e098d1982f..25b447e1df7d4 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -25,6 +25,9 @@ from pandas import DatetimeIndex, Float64Index, NaT, Series, TimedeltaIndex, date_range from pandas.util import testing as tm +now = pd.Timestamp.now() +utcnow = pd.Timestamp.now("UTC") + @pytest.mark.parametrize("notna_f", [notna, notnull]) def test_notna_notnull(notna_f): @@ -333,19 +336,17 @@ def test_array_equivalent(): @pytest.mark.parametrize( - "lvalue", - [ - pd.Timestamp.now(), - pd.Timestamp.now().to_datetime64(), - pd.Timestamp.now().to_pydatetime(), - ], -) -@pytest.mark.parametrize( - "rvalue", + "lvalue, rvalue", [ - pd.Timestamp.now("UTC"), - pd.Timestamp.now().to_datetime64(), - pd.Timestamp.now("UTC").to_pydatetime(), + # There are 3 variants for each of lvalue and rvalue. We include all + # three for the tz-naive `now` and exclude the datetim64 variant + # for utcnow because it drops tzinfo. + (now, utcnow), + (now.to_datetime64(), utcnow), + (now.to_pydatetime(), utcnow), + (now, utcnow), + (now.to_datetime64(), utcnow.to_pydatetime()), + (now.to_pydatetime(), utcnow.to_pydatetime()), ], ) def test_array_equivalent_tzawareness(lvalue, rvalue):