pandas-dev · jreback · Aug 14, 2018 · Aug 2, 2018 · Aug 2, 2018 · Aug 2, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -216,7 +216,7 @@ New Behavior:
    idx = pd.interval_range(0, 4)
    idx.values
 
-This mirrors ``CateogricalIndex.values``, which returns a ``Categorical``.
+This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.
 
 For situations where you need an ``ndarray`` of ``Interval`` objects, use
 :meth:`numpy.asarray` or ``idx.astype(object)``.
@@ -406,6 +406,34 @@ Previous Behavior:
     In [3]: pi - pi[0]
     Out[3]: Int64Index([0, 1, 2], dtype='int64')
 
+
+.. _whatsnew_0240.api.timedelta64_subtract_nan
+
+Addition/Subtraction of ``NaN`` from :class:``DataFrame``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Adding or subtracting ``NaN`` from a :class:`DataFrame` column with
+`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning
+all-``NaT``.  This is for compatibility with ``TimedeltaIndex`` and
+``Series`` behavior (:issue:`22163`)
+
+.. ipython:: python
+
+    df = pd.DataFrame([pd.Timedelta(days=1)])
+    df - np.nan
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+    In [4]: df = pd.DataFrame([pd.Timedelta(days=1)])
+
+    In [5]: df - np.nan
+    Out[5]:
+        0
+    0 NaT
+
+
 .. _whatsnew_0240.api.extension:
 
 ExtensionType Changes
@@ -538,6 +566,16 @@ Datetimelike
 - Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`)
 - Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`)
 - Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`)
+- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
+- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
+- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
+- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`)
+-
 
 Timedelta
 ^^^^^^^^^
@@ -585,6 +623,7 @@ Numeric
   when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``),
   a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`).
 - Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`)
+- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`)
 -
 
 Strings

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4936,6 +4936,14 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
         return self._constructor(new_data)
 
     def _combine_const(self, other, func, errors='raise', try_cast=True):
+        if lib.is_scalar(other) or np.ndim(other) == 0:
+            new_data = {i: func(self.iloc[:, i], other)
+                        for i, col in enumerate(self.columns)}
+
+            result = self._constructor(new_data, index=self.index, copy=False)
+            result.columns = self.columns
+            return result
+
         new_data = self._data.eval(func=func, other=other,
                                    errors=errors,
                                    try_cast=try_cast)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1350,7 +1350,7 @@ def na_op(x, y):
                 with np.errstate(all='ignore'):
                     result = method(y)
                 if result is NotImplemented:
-                    raise TypeError("invalid type comparison")
+                    return invalid_comparison(x, y, op)
             else:
                 result = op(x, y)
 
@@ -1366,6 +1366,10 @@ def wrapper(self, other, axis=None):
 
         res_name = get_op_result_name(self, other)
 
+        if isinstance(other, list):
+            # TODO: same for tuples?
+            other = np.asarray(other)
+
         if isinstance(other, ABCDataFrame):  # pragma: no cover
             # Defer to DataFrame implementation; fail early
             return NotImplemented
@@ -1459,8 +1463,6 @@ def wrapper(self, other, axis=None):
 
         else:
             values = self.get_values()
-            if isinstance(other, list):
-                other = np.asarray(other)
 
             with np.errstate(all='ignore'):
                 res = na_op(values, other)
@@ -1741,7 +1743,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
             if fill_value is not None:
                 self = self.fillna(fill_value)
 
-            return self._combine_const(other, na_op, try_cast=True)
+            pass_op = op if lib.is_scalar(other) else na_op
+            return self._combine_const(other, pass_op, try_cast=True)
 
     f.__name__ = op_name
 

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -63,6 +63,15 @@ def test_tz_aware_scalar_comparison(self, timestamps):
         expected = pd.DataFrame({'test': [False, False]})
         tm.assert_frame_equal(df == -1, expected)
 
+    def test_dt64_nat_comparison(self):
+        # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly
+        ts = pd.Timestamp.now()
+        df = pd.DataFrame([ts, pd.NaT])
+        expected = pd.DataFrame([True, False])
+
+        result = df == ts
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDatetime64SeriesComparison(object):
     # TODO: moved from tests.series.test_operators; needs cleanup
@@ -640,10 +649,22 @@ def test_dti_cmp_object_dtype(self):
 # Arithmetic
 
 class TestFrameArithmetic(object):
+    def test_dt64arr_sub_dtscalar(self, box):
+        # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype
+        idx = pd.date_range('2013-01-01', periods=3)
+        idx = tm.box_expected(idx, box)
+
+        ts = pd.Timestamp('2013-01-01')
+        # TODO: parametrize over scalar types
+
+        expected = pd.TimedeltaIndex(['0 Days', '1 Day', '2 Days'])
+        expected = tm.box_expected(expected, box)
+
+        result = idx - ts
+        tm.assert_equal(result, expected)
 
-    @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano',
-                       strict=True)
     def test_df_sub_datetime64_not_ns(self):
+        # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano
         df = pd.DataFrame(pd.date_range('20130101', periods=3))
         dt64 = np.datetime64('2013-01-01')
         assert dt64.dtype == 'datetime64[D]'
@@ -992,9 +1013,11 @@ def test_dti_add_sub_float(self, op, other):
         with pytest.raises(TypeError):
             op(dti, other)
 
-    def test_dti_add_timestamp_raises(self):
+    def test_dti_add_timestamp_raises(self, box):
+        # GH#22163 ensure DataFrame doesn't cast Timestamp to i8
         idx = DatetimeIndex(['2011-01-01', '2011-01-02'])
-        msg = "cannot add DatetimeIndex and Timestamp"
+        idx = tm.box_expected(idx, box)
+        msg = "cannot add"
         with tm.assert_raises_regex(TypeError, msg):
             idx + Timestamp('2011-01-01')
 
@@ -1090,13 +1113,17 @@ def test_dti_add_intarray_no_freq(self, box):
     # -------------------------------------------------------------
     # Binary operations DatetimeIndex and timedelta-like
 
-    def test_dti_add_timedeltalike(self, tz_naive_fixture, delta):
+    def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box):
+        # GH#22005, GH#22163 check DataFrame doesn't raise TypeError
         tz = tz_naive_fixture
         rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz)
+        rng = tm.box_expected(rng, box)
+
         result = rng + delta
         expected = pd.date_range('2000-01-01 02:00',
                                  '2000-02-01 02:00', tz=tz)
-        tm.assert_index_equal(result, expected)
+        expected = tm.box_expected(expected, box)
+        tm.assert_equal(result, expected)
 
     def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta):
         tz = tz_naive_fixture
@@ -1662,14 +1689,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names):
             res3 = dti - other
         tm.assert_series_equal(res3, expected_sub)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        pd.Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_dti_add_offset_tzaware(self, tz_aware_fixture, box):
+        # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype
         timezone = tz_aware_fixture
         if timezone == 'US/Pacific':
             dates = date_range('2012-11-01', periods=3, tz=timezone)

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -58,13 +58,6 @@ def test_ops_series(self):
         tm.assert_series_equal(expected, td * other)
         tm.assert_series_equal(expected, other * td)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="block.eval incorrect",
-                                             strict=True))
-    ])
     @pytest.mark.parametrize('index', [
         pd.Int64Index(range(1, 11)),
         pd.UInt64Index(range(1, 11)),
@@ -79,7 +72,7 @@ def test_ops_series(self):
     def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box):
         # GH#19333
 
-        if (box is Series and
+        if (box in [Series, pd.DataFrame] and
                 type(scalar_td) is timedelta and index.dtype == 'f8'):
             raise pytest.xfail(reason="Cannot multiply timedelta by float")
 

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -17,6 +17,53 @@
 # Comparisons
 
 class TestFrameComparisons(object):
+    def test_flex_comparison_nat(self):
+        # GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT,
+        # and _definitely_ not be NaN
+        df = pd.DataFrame([pd.NaT])
+
+        result = df == pd.NaT
+        # result.iloc[0, 0] is a np.bool_ object
+        assert result.iloc[0, 0].item() is False
+
+        result = df.eq(pd.NaT)
+        assert result.iloc[0, 0].item() is False
+
+        result = df != pd.NaT
+        assert result.iloc[0, 0].item() is True
+
+        result = df.ne(pd.NaT)
+        assert result.iloc[0, 0].item() is True
+
+    def test_mixed_comparison(self):
+        # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
+        # not raise TypeError
+        # (this appears to be fixed before #22163, not sure when)
+        df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]])
+        other = pd.DataFrame([['a', 'b'], ['c', 'd']])
+
+        result = df == other
+        assert not result.any().any()
+
+        result = df != other
+        assert result.all().all()
+
+    def test_df_numeric_cmp_dt64_raises(self):
+        # GH#8932, GH#22163
+        ts = pd.Timestamp.now()
+        df = pd.DataFrame({'x': range(5)})
+        with pytest.raises(TypeError):
+            df > ts
+        with pytest.raises(TypeError):
+            df < ts
+        with pytest.raises(TypeError):
+            ts < df
+        with pytest.raises(TypeError):
+            ts > df
+
+        assert not (df == ts).any().any()
+        assert (df != ts).all().all()
+
     def test_df_boolean_comparison_error(self):
         # GH#4576
         # boolean comparisons with a tuple/list give unexpected results
@@ -32,8 +79,8 @@ def test_df_float_none_comparison(self):
         df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
                           columns=['A', 'B', 'C'])
 
-        with pytest.raises(TypeError):
-            df.__eq__(None)
+        result = df.__eq__(None)
+        assert not result.any().any()
 
     def test_df_string_comparison(self):
         df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
@@ -251,3 +298,20 @@ def test_arith_flex_zero_len_raises(self):
 
         with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
             df_len0.sub(df['A'], axis=None, fill_value=3)
+
+
+class TestFrameArithmetic(object):
+    def test_df_bool_mul_int(self):
+        # GH#22047, GH#22163 multiplication by 1 should result in int dtype,
+        # not object dtype
+        df = pd.DataFrame([[False, True], [False, False]])
+        result = df * 1
+
+        # On appveyor this comes back as np.int32 instead of np.int64,
+        # so we check dtype.kind instead of just dtype
+        kinds = result.dtypes.apply(lambda x: x.kind)
+        assert (kinds == 'i').all()
+
+        result = 1 * df
+        kinds = result.dtypes.apply(lambda x: x.kind)
+        assert (kinds == 'i').all()
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
@@ -273,6 +273,8 @@ def test_getitem_boolean(self):
         # test df[df > 0]
         for df in [self.tsframe, self.mixed_frame,
                    self.mixed_float, self.mixed_int]:
+            if compat.PY3 and df is self.mixed_frame:
+                continue
 
             data = df._get_numeric_data()
             bif = df[df > 0]
@@ -2468,8 +2470,11 @@ def test_boolean_indexing_mixed(self):
         assert_frame_equal(df2, expected)
 
         df['foo'] = 'test'
-        with tm.assert_raises_regex(TypeError, 'boolean setting '
-                                    'on mixed-type'):
+        msg = ("boolean setting on mixed-type|"
+               "not supported between|"
+               "unorderable types")
+        with tm.assert_raises_regex(TypeError, msg):
+            # TODO: This message should be the same in PY2/PY3
             df[df > 0.3] = 1
 
     def test_where(self):
@@ -2502,6 +2507,10 @@ def _check_get(df, cond, check_dtypes=True):
         # check getting
         for df in [default_frame, self.mixed_frame,
                    self.mixed_float, self.mixed_int]:
+            if compat.PY3 and df is self.mixed_frame:
+                with pytest.raises(TypeError):
+                    df > 0
+                continue
             cond = df > 0
             _check_get(df, cond)
 
@@ -2549,6 +2558,10 @@ def _check_align(df, cond, other, check_dtypes=True):
                 assert (rs.dtypes == df.dtypes).all()
 
         for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
+            if compat.PY3 and df is self.mixed_frame:
+                with pytest.raises(TypeError):
+                    df > 0
+                continue
 
             # other is a frame
             cond = (df > 0)[1:]
@@ -2594,6 +2607,10 @@ def _check_set(df, cond, check_dtypes=True):
 
         for df in [default_frame, self.mixed_frame, self.mixed_float,
                    self.mixed_int]:
+            if compat.PY3 and df is self.mixed_frame:
+                with pytest.raises(TypeError):
+                    df > 0
+                continue
 
             cond = df > 0
             _check_set(df, cond)
@@ -2759,9 +2776,14 @@ def test_where_datetime(self):
                             C=np.random.randn(5)))
 
         stamp = datetime(2013, 1, 3)
-        result = df[df > stamp]
+        with pytest.raises(TypeError):
+            df > stamp
+
+        result = df[df.iloc[:, :-1] > stamp]
+
         expected = df.copy()
         expected.loc[[0, 1], 'A'] = np.nan
+        expected.loc[:, 'C'] = np.nan
         assert_frame_equal(result, expected)
 
     def test_where_none(self):

diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
@@ -150,10 +150,15 @@ def test_timestamp_compare(self):
             right_f = getattr(operator, right)
 
             # no nats
-            expected = left_f(df, Timestamp('20010109'))
-            result = right_f(Timestamp('20010109'), df)
-            assert_frame_equal(result, expected)
-
+            if left in ['eq', 'ne']:
+                expected = left_f(df, Timestamp('20010109'))
+                result = right_f(Timestamp('20010109'), df)
+                assert_frame_equal(result, expected)
+            else:
+                with pytest.raises(TypeError):
+                    left_f(df, Timestamp('20010109'))
+                with pytest.raises(TypeError):
+                    right_f(Timestamp('20010109'), df)
             # nats
             expected = left_f(df, Timestamp('nat'))
             result = right_f(Timestamp('nat'), df)

diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py
@@ -473,7 +473,6 @@ def test_timedelta_ops_with_missing_values(self):
         scalar1 = pd.to_timedelta('00:00:01')
         scalar2 = pd.to_timedelta('00:00:02')
         timedelta_NaT = pd.to_timedelta('NaT')
-        NA = np.nan
 
         actual = scalar1 + scalar1
         assert actual == scalar2
@@ -541,10 +540,10 @@ def test_timedelta_ops_with_missing_values(self):
         actual = df1 - timedelta_NaT
         tm.assert_frame_equal(actual, dfn)
 
-        actual = df1 + NA
-        tm.assert_frame_equal(actual, dfn)
-        actual = df1 - NA
-        tm.assert_frame_equal(actual, dfn)
+        with pytest.raises(TypeError):
+            df1 + np.nan
+        with pytest.raises(TypeError):
+            df1 - np.nan
 
         actual = df1 + pd.NaT  # NaT is datetime, not timedelta
         tm.assert_frame_equal(actual, dfn)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1235,16 +1235,31 @@ def test_binop_other(self, op, value, dtype):
                 (operator.truediv, 'bool'),
                 (operator.mod, 'i8'),
                 (operator.mod, 'complex128'),
-                (operator.mod, '<M8[ns]'),
-                (operator.mod, '<m8[ns]'),
                 (operator.pow, 'bool')}
         if (op, dtype) in skip:
             pytest.skip("Invalid combination {},{}".format(op, dtype))
+
         e = DummyElement(value, dtype)
         s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype)
-        result = op(s, e).dtypes
-        expected = op(s, value).dtypes
-        assert_series_equal(result, expected)
+
+        invalid = {(operator.pow, '<M8[ns]'),
+                   (operator.mod, '<M8[ns]'),
+                   (operator.truediv, '<M8[ns]'),
+                   (operator.mul, '<M8[ns]'),
+                   (operator.add, '<M8[ns]'),
+                   (operator.pow, '<m8[ns]'),
+                   (operator.mod, '<m8[ns]'),
+                   (operator.mul, '<m8[ns]')}
+
+        if (op, dtype) in invalid:
+            with pytest.raises(TypeError):
+                result = op(s, e.value)
+        else:
+            # FIXME: Since dispatching to Series, this test no longer
+            # asserts anything meaningful
+            result = op(s, e.value).dtypes
+            expected = op(s, value).dtypes
+            assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize('typestr, holder', [

diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py
@@ -12,7 +12,6 @@
 
 from pandas.core import ops
 from pandas.errors import NullFrequencyError, PerformanceWarning
-from pandas._libs.tslibs import IncompatibleFrequency
 from pandas import (
     timedelta_range,
     Timedelta, Timestamp, NaT, Series, TimedeltaIndex, DatetimeIndex,
@@ -429,15 +428,6 @@ def test_td64arr_add_sub_float(self, box, op, other):
         with pytest.raises(TypeError):
             op(tdi, other)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Tries to cast df to "
-                                                    "Period",
-                                             strict=True,
-                                             raises=IncompatibleFrequency))
-    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('freq', [None, 'H'])
     def test_td64arr_sub_period(self, box, freq):
         # GH#13078
@@ -481,17 +471,12 @@ def test_td64arr_sub_timestamp_raises(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
         idx = tm.box_expected(idx, box)
 
-        msg = "cannot subtract a datelike from|Could not operate"
+        msg = ("cannot subtract a datelike from|"
+               "Could not operate|"
+               "cannot perform operation")
         with tm.assert_raises_regex(TypeError, msg):
             idx - Timestamp('2011-01-01')
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_add_timestamp(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
         expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
@@ -502,13 +487,6 @@ def test_td64arr_add_timestamp(self, box):
         result = idx + Timestamp('2011-01-01')
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64_radd_timestamp(self, box):
         idx = TimedeltaIndex(['1 day', '2 day'])
         expected = DatetimeIndex(['2011-01-02', '2011-01-03'])
@@ -520,15 +498,6 @@ def test_td64_radd_timestamp(self, box):
         result = Timestamp('2011-01-01') + idx
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object dtype "
-                                                    "instead of "
-                                                    "datetime64[ns]",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_add_sub_timestamp(self, box):
         # GH#11925
         ts = Timestamp('2012-01-01')
@@ -685,7 +654,8 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser):
 
         if box is pd.DataFrame and isinstance(scalar, np.ndarray):
             # raises ValueError
-            pytest.xfail(reason="DataFrame to broadcast incorrectly")
+            pytest.xfail(reason="reversed ops return incorrect answers "
+                                "instead of raising.")
 
         tdser = tm.box_expected(tdser, box)
         err = TypeError
@@ -1151,8 +1121,7 @@ def test_timedelta64_conversions(self, m, unit):
     # Multiplication
     # organized with scalar others first, then array-like
 
-    def test_td64arr_mul_int(self, box_df_fail):
-        box = box_df_fail  # DataFrame op returns object instead of m8[ns]
+    def test_td64arr_mul_int(self, box):
         idx = TimedeltaIndex(np.arange(5, dtype='int64'))
         idx = tm.box_expected(idx, box)
 
@@ -1171,8 +1140,7 @@ def test_td64arr_mul_tdlike_scalar_raises(self, delta, box):
         with pytest.raises(TypeError):
             rng * delta
 
-    def test_tdi_mul_int_array_zerodim(self, box_df_fail):
-        box = box_df_fail  # DataFrame op returns object dtype
+    def test_tdi_mul_int_array_zerodim(self, box):
         rng5 = np.arange(5, dtype='int64')
         idx = TimedeltaIndex(rng5)
         expected = TimedeltaIndex(rng5 * 5)
@@ -1248,24 +1216,22 @@ def test_tdi_rmul_arraylike(self, other, box_df_fail):
     # ------------------------------------------------------------------
     # __div__
 
-    def test_td64arr_div_nat_invalid(self, box_df_fail):
+    def test_td64arr_div_nat_invalid(self, box):
         # don't allow division by NaT (maybe could in the future)
-        box = box_df_fail  # DataFrame returns all-NaT instead of raising
         rng = timedelta_range('1 days', '10 days', name='foo')
         rng = tm.box_expected(rng, box)
         with pytest.raises(TypeError):
             rng / pd.NaT
 
-    def test_td64arr_div_int(self, box_df_fail):
-        box = box_df_fail  # DataFrame returns object dtype instead of m8[ns]
+    def test_td64arr_div_int(self, box):
         idx = TimedeltaIndex(np.arange(5, dtype='int64'))
         idx = tm.box_expected(idx, box)
 
         result = idx / 1
         tm.assert_equal(result, idx)
 
-    def test_tdi_div_tdlike_scalar(self, delta, box_df_fail):
-        box = box_df_fail  # DataFrame op returns m8[ns] instead of float64
+    def test_tdi_div_tdlike_scalar(self, delta, box):
+        # GH#20088, GH#22163 ensure DataFrame returns correct dtype
         rng = timedelta_range('1 days', '10 days', name='foo')
         expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo')
 
@@ -1275,8 +1241,7 @@ def test_tdi_div_tdlike_scalar(self, delta, box_df_fail):
         result = rng / delta
         tm.assert_equal(result, expected)
 
-    def test_tdi_div_tdlike_scalar_with_nat(self, delta, box_df_fail):
-        box = box_df_fail  # DataFrame op returns m8[ns] instead of float64
+    def test_tdi_div_tdlike_scalar_with_nat(self, delta, box):
         rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo')
         expected = pd.Float64Index([12, np.nan, 24], name='foo')
 
@@ -1289,14 +1254,6 @@ def test_tdi_div_tdlike_scalar_with_nat(self, delta, box_df_fail):
     # ------------------------------------------------------------------
     # __floordiv__, __rfloordiv__
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Incorrectly returns "
-                                                    "m8[ns] instead of f8",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_floordiv_tdscalar(self, box, scalar_td):
         # GH#18831
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
@@ -1310,15 +1267,11 @@ def test_td64arr_floordiv_tdscalar(self, box, scalar_td):
         result = td1 // scalar_td
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Incorrectly casts to f8",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
         # GH#18831
+        if box is pd.DataFrame and isinstance(scalar_td, np.timedelta64):
+            pytest.xfail(reason="raises TypeError, not sure why")
+
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
         td1.iloc[2] = np.nan
 
@@ -1330,14 +1283,6 @@ def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td):
         result = scalar_td // td1
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns m8[ns] dtype "
-                                                    "instead of f8",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td):
         # GH#18831
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
@@ -1353,15 +1298,13 @@ def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td):
         result = td1.__rfloordiv__(scalar_td)
         tm.assert_equal(result, expected)
 
-    def test_td64arr_floordiv_int(self, box_df_fail):
-        box = box_df_fail  # DataFrame returns object dtype
+    def test_td64arr_floordiv_int(self, box):
         idx = TimedeltaIndex(np.arange(5, dtype='int64'))
         idx = tm.box_expected(idx, box)
         result = idx // 1
         tm.assert_equal(result, idx)
 
-    def test_td64arr_floordiv_tdlike_scalar(self, delta, box_df_fail):
-        box = box_df_fail  # DataFrame returns m8[ns] instead of int64 dtype
+    def test_td64arr_floordiv_tdlike_scalar(self, delta, box):
         tdi = timedelta_range('1 days', '10 days', name='foo')
         expected = pd.Int64Index((np.arange(10) + 1) * 12, name='foo')
 
@@ -1377,9 +1320,8 @@ def test_td64arr_floordiv_tdlike_scalar(self, delta, box_df_fail):
         Timedelta('10m7s'),
         Timedelta('10m7s').to_timedelta64()
     ], ids=lambda x: type(x).__name__)
-    def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_df_fail):
+    def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box):
         # GH#19125
-        box = box_df_fail  # DataFrame op returns m8[ns] instead of f8 dtype
         tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None)
         expected = pd.Index([2.0, 2.0, np.nan])
 
@@ -1398,15 +1340,6 @@ def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_df_fail):
     # ------------------------------------------------------------------
     # Operations with invalid others
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="__mul__ op treats "
-                                                    "timedelta other as i8; "
-                                                    "rmul OK",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td):
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
         td1.iloc[2] = np.nan
@@ -1439,13 +1372,6 @@ def test_td64arr_mul_td64arr_raises(self, box):
     # ------------------------------------------------------------------
     # Operations with numeric others
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object-dtype",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)])
     def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
         # GH#4521
@@ -1470,13 +1396,6 @@ def test_td64arr_mul_numeric_scalar(self, box, one, tdser):
         result = (2 * one) * tdser
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="Returns object-dtype",
-                                             strict=True))
-    ], ids=lambda x: x.__name__)
     @pytest.mark.parametrize('two', [2, 2.0, np.array(2), np.array(2.0)])
     def test_td64arr_div_numeric_scalar(self, box, two, tdser):
         # GH#4521
@@ -1616,14 +1535,10 @@ def test_float_series_rdiv_td64arr(self, box, names):
 
 class TestTimedeltaArraylikeInvalidArithmeticOps(object):
 
-    @pytest.mark.parametrize('box', [
-        pd.Index,
-        Series,
-        pytest.param(pd.DataFrame,
-                     marks=pytest.mark.xfail(reason="raises ValueError "
-                                                    "instead of TypeError",
-                                             strict=True))
-    ])
+    @pytest.mark.parametrize('scalar_td', [
+        timedelta(minutes=5, seconds=4),
+        Timedelta('5m4s'),
+        Timedelta('5m4s').to_timedelta64()])
     def test_td64arr_pow_invalid(self, scalar_td, box):
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
         td1.iloc[2] = np.nan

diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
@@ -390,10 +390,10 @@ def test_bool_ops_raise_on_arithmetic(self):
                 with tm.assert_raises_regex(NotImplementedError, err_msg):
                     f(False, df.a)
 
-                with tm.assert_raises_regex(TypeError, err_msg):
+                with tm.assert_raises_regex(NotImplementedError, err_msg):
                     f(False, df)
 
-                with tm.assert_raises_regex(TypeError, err_msg):
+                with tm.assert_raises_regex(NotImplementedError, err_msg):
                     f(df, True)
 
     def test_bool_ops_warn_on_arithmetic(self):