From ecf3def42d7c17a866bfa2f3a84fe7014ade56e9 Mon Sep 17 00:00:00 2001
From: Dr-Irv <irv@princeton.com>
Date: Mon, 30 Apr 2018 18:00:44 -0400
Subject: [PATCH 1/2] Support operators for ExtensionArray

---
 pandas/core/arrays/base.py                    | 21 ++++
 pandas/core/indexes/base.py                   | 22 +++--
 pandas/core/ops.py                            | 96 ++++++++++++++++++-
 pandas/core/series.py                         | 48 ++++++++--
 pandas/tests/extension/base/getitem.py        | 12 +++
 .../extension/category/test_categorical.py    | 13 +++
 .../tests/extension/decimal/test_decimal.py   | 59 ++++++++++++
 pandas/tests/extension/json/array.py          | 17 ++++
 pandas/tests/extension/json/test_json.py      | 42 ++++++++
 pandas/tests/series/test_operators.py         |  6 +-
 pandas/util/testing.py                        | 13 ++-
 11 files changed, 327 insertions(+), 22 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 1922801c30719..14382f59028b4 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -53,6 +53,13 @@ class ExtensionArray(object):
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
 
+    For logical operators, the default is to return a Series of boolean.
+    However, if the underlying ExtensionDtype overrides the logical
+    operators, then the implementer may want to have an ExtensionArray
+    subclass contain the result.  This can be done by changing the property
+    _logical_result from its default value of None to the _from_sequence
+    method of the ExtensionArray subclass.
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -567,6 +574,9 @@ def copy(self, deep=False):
         """
         raise AbstractMethodError(self)
 
+    # See documentation above
+    _logical_result = None
+
     # ------------------------------------------------------------------------
     # Block-related methods
     # ------------------------------------------------------------------------
@@ -610,3 +620,14 @@ def _ndarray_values(self):
         used for interacting with our indexers.
         """
         return np.array(self)
+
+    # ------------------------------------------------------------------------
+    # Utilities for use by subclasses
+    # ------------------------------------------------------------------------
+    def is_sequence_of_dtype(self, seq):
+        """
+        Given a sequence, determine whether all members have the appropriate
+        type for this instance of an ExtensionArray
+        """
+        thistype = self.dtype.type
+        return all([isinstance(i, thistype) for i in seq])
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2ceec1592d49b..6f96b78a9dbc6 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3068,13 +3068,23 @@ def get_value(self, series, key):
         # if we have something that is Index-like, then
         # use this, e.g. DatetimeIndex
         s = getattr(series, '_values', None)
-        if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
-            try:
-                return s[key]
-            except (IndexError, ValueError):
+        if is_scalar(key):
+            if isinstance(s, Index):
+                try:
+                    return s[key]
+                except (IndexError, ValueError):
 
-                # invalid type as an indexer
-                pass
+                    # invalid type as an indexer
+                    pass
+            elif isinstance(s, ExtensionArray):
+                try:
+                    # This should call the ExtensionArray __getitem__
+                    iloc = self.get_loc(key)
+                    return s[iloc]
+                except (IndexError, ValueError):
+
+                    # invalid type as an indexer
+                    pass
 
         s = com._values_from_object(series)
         k = com._values_from_object(key)
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index e14f82906cd06..2939ab1c021b7 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -6,6 +6,7 @@
 # necessary to enforce truediv in Python 2.X
 from __future__ import division
 import operator
+import inspect
 
 import numpy as np
 import pandas as pd
@@ -30,7 +31,7 @@
     is_bool_dtype,
     is_list_like,
     is_scalar,
-    _ensure_object)
+    _ensure_object, is_extension_array_dtype)
 from pandas.core.dtypes.cast import (
     maybe_upcast_putmask, find_common_type,
     construct_1d_object_array_from_listlike)
@@ -990,6 +991,93 @@ def _construct_divmod_result(left, result, index, name, dtype):
     )
 
 
+def dispatch_to_extension_op(left, right, op_name=None, is_logical=False):
+    """
+    Assume that left is a Series backed by an ExtensionArray,
+    apply the operator defined by op_name.
+    """
+
+    method = getattr(left.values, op_name, None)
+    deflen = len(left)
+    excons = type(left.values)._from_sequence
+    exclass = type(left.values)
+    testseq = left.values
+
+    if is_logical:
+        if exclass._logical_result is not None:
+            excons = exclass._logical_result
+        else:
+            excons = None  # Indicates boolean
+
+    # The idea here is as follows.  First we see if the op is
+    # defined in the ExtensionArray subclass, and returns a
+    # result that is not NotImplemented.  If so, we use that
+    # result. If that fails, then we try an
+    # element by element operator, invoking the operator
+    # on each element
+
+    # First see if the extension array object supports the op
+    res = NotImplemented
+    if method is not None and inspect.ismethod(method):
+        rvalues = right
+        if is_extension_array_dtype(right) and isinstance(right, ABCSeries):
+            rvalues = right.values
+        try:
+            res = method(rvalues)
+        except TypeError:
+            pass
+        except Exception as e:
+            raise e
+
+    def convert_values(parm):
+        if is_extension_array_dtype(parm):
+            ovalues = parm.values
+        elif is_list_like(parm):
+            ovalues = parm
+        else:  # Assume its an object
+            ovalues = [parm] * deflen
+        return ovalues
+
+    if res is NotImplemented:
+        # Try it on each element.  Support operation to another
+        # ExtensionArray, or something that is list like, or
+        # a single object.  This allows a result of an operator
+        # to be an object or any type
+        lvalues = convert_values(left)
+        rvalues = convert_values(right)
+
+        # Get the method for each object.
+        def callfunc(a, b):
+            f = getattr(a, op_name, None)
+            if f is not None:
+                return f(b)
+            else:
+                return NotImplemented
+        res = [callfunc(a, b) for (a, b) in zip(lvalues, rvalues)]
+
+        # We can't use (NotImplemented in res) because the
+        # results might be objects that have overridden __eq__
+        if any([isinstance(r, type(NotImplemented)) for r in res]):
+            msg = "invalid operation {opn} between {one} and {two}"
+            raise TypeError(msg.format(opn=op_name,
+                                       one=type(lvalues),
+                                       two=type(rvalues)))
+
+    # At this point we have the result
+    # always return a full value series here
+    res_values = com._values_from_object(res)
+    if excons is not None:
+        if testseq.is_sequence_of_dtype(res_values):
+            # Convert to the ExtensionArray type if each result is of that
+            # type.  If _logical_result was not None, this will then use
+            # the function set there to return an appropriate result
+            res_values = excons(res_values)
+
+    res_name = get_op_result_name(left, right)
+    return left._constructor(res_values, index=left.index,
+                             name=res_name)
+
+
 def _arith_method_SERIES(cls, op, special):
     """
     Wrapper function for Series arithmetic operations, to avoid
@@ -1058,6 +1146,9 @@ def wrapper(left, right):
             raise TypeError("{typ} cannot perform the operation "
                             "{op}".format(typ=type(left).__name__, op=str_rep))
 
+        elif is_extension_array_dtype(left):
+            return dispatch_to_extension_op(left, right, op_name)
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1208,6 +1299,9 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
+        elif is_extension_array_dtype(self):
+            return dispatch_to_extension_op(self, other, op_name, True)
+
         elif isinstance(other, ABCSeries):
             # By this point we have checked that self._indexed_same(other)
             res_values = na_op(self.values, other.values)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a14f3299e11e9..3ef431ed39761 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2174,10 +2174,26 @@ def _binop(self, other, func, level=None, fill_value=None):
 
         this_vals, other_vals = ops.fill_binop(this.values, other.values,
                                                fill_value)
-
-        with np.errstate(all='ignore'):
-            result = func(this_vals, other_vals)
         name = ops.get_op_result_name(self, other)
+
+        if is_extension_array_dtype(this) or is_extension_array_dtype(other):
+            try:
+                result = func(this_vals, other_vals)
+            except TypeError:
+                result = NotImplemented
+            except Exception as e:
+                raise e
+
+            if result is NotImplemented:
+                result = [func(a, b) for a, b in zip(this_vals, other_vals)]
+                if is_extension_array_dtype(this):
+                    excons = type(this_vals)._from_sequence
+                else:
+                    excons = type(other_vals)._from_sequence
+                result = excons(result)
+        else:
+            with np.errstate(all='ignore'):
+                result = func(this_vals, other_vals)
         result = self._constructor(result, index=new_index, name=name)
         result = result.__finalize__(self)
         if name is None:
@@ -2185,7 +2201,7 @@ def _binop(self, other, func, level=None, fill_value=None):
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
@@ -2197,6 +2213,9 @@ def combine(self, other, func, fill_value=np.nan):
         func : function
             Function that takes two scalars as inputs and return a scalar
         fill_value : scalar value
+            The default specifies to use np.nan unless self is
+            backed by ExtensionArray, in which case the ExtensionArray
+            na_value is used.
 
         Returns
         -------
@@ -2216,20 +2235,33 @@ def combine(self, other, func, fill_value=np.nan):
         Series.combine_first : Combine Series values, choosing the calling
             Series's values first
         """
+        self_is_ext = is_extension_array_dtype(self)
+        if fill_value is None:
+            if self_is_ext:
+                fill_value = self.dtype.na_value
+            else:
+                fill_value = np.nan
         if isinstance(other, Series):
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=self.dtype)
+            new_values = []
             for i, idx in enumerate(new_index):
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all='ignore'):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
             new_index = self.index
-            with np.errstate(all='ignore'):
-                new_values = func(self._values, other)
+            if not self_is_ext:
+                with np.errstate(all='ignore'):
+                    new_values = func(self._values, other)
+            else:
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
+
+        if (self_is_ext and self.values.is_sequence_of_dtype(new_values)):
+            new_values = self._values._from_sequence(new_values)
+
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other):
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index 5c9ede1079079..238ab81e009df 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -117,6 +117,18 @@ def test_getitem_slice(self, data):
         result = data[slice(1)]  # scalar
         assert isinstance(result, type(data))
 
+    def test_get(self, data):
+        # GH 20882
+        s = pd.Series(data, index=[2 * i for i in range(len(data))])
+        assert s.get(4) == s.iloc[2]
+
+        result = s.get([4, 6])
+        expected = s.iloc[[2, 3]]
+        self.assert_series_equal(result, expected)
+
+        s = pd.Series(data[:6], index=list('abcdef'))
+        assert s.get('c') == s.iloc[2]
+
     def test_take_sequence(self, data):
         result = pd.Series(data)[[0, 1, 3]]
         assert result.iloc[0] == data[0]
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
index 530a4e7a22a7a..3e9a97cfae402 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -2,6 +2,9 @@
 
 import pytest
 import numpy as np
+import pandas as pd
+
+import pandas.util.testing as tm
 
 from pandas.api.types import CategoricalDtype
 from pandas import Categorical
@@ -157,3 +160,13 @@ def test_value_counts(self, all_data, dropna):
 
 class TestCasting(base.BaseCastingTests):
     pass
+
+
+def test_combine():
+    orig_data1 = make_data()
+    orig_data2 = make_data()
+    s1 = pd.Series(Categorical(orig_data1, ordered=True))
+    s2 = pd.Series(Categorical(orig_data2, ordered=True))
+    result = s1.combine(s2, lambda x1, x2: x1 <= x2)
+    expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)])
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 1f8cf0264f62f..32690595bcd2b 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -7,6 +7,9 @@
 
 from pandas.tests.extension import base
 
+from pandas.tests.series.test_operators import TestSeriesOperators
+from pandas.util._decorators import cache_readonly
+
 from .array import DecimalDtype, DecimalArray, make_data
 
 
@@ -183,3 +186,59 @@ def test_dataframe_constructor_with_different_dtype_raises():
     xpr = "Cannot coerce extension array to dtype 'int64'. "
     with tm.assert_raises_regex(ValueError, xpr):
         pd.DataFrame({"A": arr}, dtype='int64')
+
+
+def test_addition(data):
+    s = pd.Series(data)
+    result = s + 10
+    expected = pd.Series(DecimalArray([i + 10 for i in data]))
+    tm.assert_series_equal(result, expected)
+
+    result = 10 + s
+    tm.assert_series_equal(result, expected)
+
+    result = s + s
+    expected = pd.Series(DecimalArray([i + i for i in data]))
+    tm.assert_series_equal(result, expected)
+
+    result = s + list(data)
+    tm.assert_series_equal(result, expected)
+
+    result = list(data) + s
+    tm.assert_series_equal(result, expected)
+
+    result = (s <= 10)
+    expected = pd.Series([i <= 10 for i in data])
+    tm.assert_series_equal(result, expected)
+
+_ts = pd.Series(DecimalArray(make_data()))
+
+
+class TestOperator(BaseDecimal, TestSeriesOperators):
+    @cache_readonly
+    def ts(self):
+        ts = _ts.copy()
+        ts.name = 'ts'
+        return ts
+
+    def test_operators(self):
+        def absfunc(v):
+            if isinstance(v, pd.Series):
+                vals = v.values
+                return pd.Series(vals._from_sequence([abs(i) for i in vals]))
+            else:
+                return abs(v)
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+        super(TestOperator, self).test_operators(absfunc)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+
+    def test_operators_corner(self):
+        pytest.skip("Cannot add empty Series of float64 to DecimalArray")
+
+    def test_divmod(self):
+        pytest.skip("divmod not appropriate for Decimal type")
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 88bb66f38b35c..334336700e79d 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -169,6 +169,23 @@ def _values_for_argsort(self):
         frozen = [()] + list(tuple(x.items()) for x in self)
         return np.array(frozen, dtype=object)[1:]
 
+    def __add__(self, other):
+        def merge_two_dicts(x, y):
+            z = x.copy()
+            z.update(y)
+            return z
+
+        if isinstance(other, type(self)):
+            seq = [merge_two_dicts(a, b)
+                   for (a, b) in zip(self.data, other.data)]
+        elif isinstance(other, self.dtype.type):
+            seq = [merge_two_dicts(a, other)
+                   for a in self.data]
+        else:
+            raise TypeError("Cannot add JSONArray and type ", type(other))
+
+        return self._from_sequence(seq)
+
 
 def make_data():
     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index b7ac8033f3f6d..3987fd7004730 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -230,3 +230,45 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
         super(TestGroupby, self).test_groupby_extension_agg(
             as_index, data_for_grouping
         )
+
+
+class TestSomeOps(BaseJSON):
+    def test_some_ops(self):
+        # Just testing that addition through the JSONArray works
+        # and then subtraction raises a TypeError
+        d1 = make_data()
+        d2 = make_data()
+        s1 = pd.Series(JSONArray(d1))
+        s2 = pd.Series(JSONArray(d2))
+        result = s1 + s2
+
+        def merge_two_dicts(x, y):
+            z = x.copy()
+            z.update(y)
+            return z
+
+        expected = pd.Series(JSONArray([merge_two_dicts(a, b)
+                                        for (a, b) in zip(d1, d2)]))
+        self.assert_series_equal(result, expected)
+
+        toadd = s2.iloc[5]
+        result = s1 + toadd
+        expected = pd.Series(JSONArray([merge_two_dicts(a, toadd)
+                                        for a in d1]))
+        self.assert_series_equal(result, expected)
+
+        with pytest.raises(TypeError):
+            # __add__ is implemented, but __radd__ is not
+            result = toadd + s1
+
+        with pytest.raises(TypeError):
+            # Cannot add a constant
+            result = s1 + 29
+
+        with pytest.raises(TypeError):
+            # __sub__ is not implemented
+            result = s1 - toadd
+
+        with pytest.raises(TypeError):
+            # __sub__ is not implemented
+            result = s1 - s2
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index f90fcce973f00..8e27b4a2e421d 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -1198,11 +1198,11 @@ def test_neg(self):
     def test_invert(self):
         assert_series_equal(-(self.series < 0), ~(self.series < 0))
 
-    def test_operators(self):
+    def test_operators(self, absfunc=np.abs):
         def _check_op(series, other, op, pos_only=False,
                       check_dtype=True):
-            left = np.abs(series) if pos_only else series
-            right = np.abs(other) if pos_only else other
+            left = absfunc(series) if pos_only else series
+            right = absfunc(other) if pos_only else other
 
             cython_or_numpy = op(left, right)
             python = left.combine(right, op)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index e1484a9c1b390..1a7fc7940f56e 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -30,7 +30,7 @@
     is_categorical_dtype,
     is_interval_dtype,
     is_sequence,
-    is_list_like)
+    is_list_like, is_extension_array_dtype)
 from pandas.io.formats.printing import pprint_thing
 from pandas.core.algorithms import take_1d
 import pandas.core.common as com
@@ -1113,10 +1113,12 @@ def assert_extension_array_equal(left, right):
     right_na = right.isna()
     assert_numpy_array_equal(left_na, right_na)
 
-    left_valid = left[~left_na].astype(object)
-    right_valid = right[~right_na].astype(object)
+    if len(left_na) > 0 and len(right_na) > 0:
 
-    assert_numpy_array_equal(left_valid, right_valid)
+        left_valid = left[~left_na].astype(object)
+        right_valid = right[~right_na].astype(object)
+
+        assert_numpy_array_equal(left_valid, right_valid)
 
 
 # This could be refactored to use the NDFrame.equals method
@@ -1219,6 +1221,9 @@ def assert_series_equal(left, right, check_dtype=True,
         left = pd.IntervalIndex(left)
         right = pd.IntervalIndex(right)
         assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj))
+    elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and
+          is_extension_array_dtype(right) and not is_categorical_dtype(right)):
+        return assert_extension_array_equal(left.values, right.values)
 
     else:
         _testing.assert_almost_equal(left.get_values(), right.get_values(),

From 40ac877c99e67ed4ecf1afbcb8c0e3b1568dc14b Mon Sep 17 00:00:00 2001
From: Dr-Irv <irv@princeton.com>
Date: Tue, 1 May 2018 12:18:00 -0400
Subject: [PATCH 2/2] fix lint issues

---
 pandas/core/arrays/base.py                     | 2 +-
 pandas/core/ops.py                             | 2 +-
 pandas/tests/extension/decimal/test_decimal.py | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 14382f59028b4..e65d02bdbcf14 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -630,4 +630,4 @@ def is_sequence_of_dtype(self, seq):
         type for this instance of an ExtensionArray
         """
         thistype = self.dtype.type
-        return all([isinstance(i, thistype) for i in seq])
+        return all(isinstance(i, thistype) for i in seq)
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 2939ab1c021b7..294c591de6959 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1057,7 +1057,7 @@ def callfunc(a, b):
 
         # We can't use (NotImplemented in res) because the
         # results might be objects that have overridden __eq__
-        if any([isinstance(r, type(NotImplemented)) for r in res]):
+        if any(isinstance(r, type(NotImplemented)) for r in res):
             msg = "invalid operation {opn} between {one} and {two}"
             raise TypeError(msg.format(opn=op_name,
                                        one=type(lvalues),
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 32690595bcd2b..06da37300d24f 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -211,6 +211,7 @@ def test_addition(data):
     expected = pd.Series([i <= 10 for i in data])
     tm.assert_series_equal(result, expected)
 
+
 _ts = pd.Series(DecimalArray(make_data()))