diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7153f5c2e7007..fcd1e3db46f3a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4935,6 +4935,33 @@ def reorder_levels(self, order, axis=0): # ---------------------------------------------------------------------- # Arithmetic / combination related + def _wrap_dispatched_op(self, result, other, func, axis=None): + """ + Wrap the result of an arithmetic/comparison operation performed + via ops.dispatch_to_series in a properly-indexed DataFrame. + + Parameters + ---------- + result : dict[int:Series] + other : DataFrame, Series, or scalar + func : binary operator + axis : {0, 1, "index", "columns"} + + Returns + ------- + DataFrame + + Notes + ----- + The `other`, `func`, and `axis` arguments are not used here but are + included to keep the signature consistent with that in SparseDataFrame. + """ + result = self._constructor(result, index=self.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + result.columns = self.columns + return result + def _combine_frame(self, other, func, fill_value=None, level=None): this, other = self.align(other, join='outer', level=level, copy=False) new_index, new_columns = this.index, this.columns diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fbfdfb9c01237..3f299d1585ce7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -792,7 +792,7 @@ def fill_binop(left, right, fill_value): return left, right -def mask_cmp_op(x, y, op, allowed_types): +def mask_cmp_op(x, y, op): """ Apply the function `op` to only non-null points in x and y. @@ -801,16 +801,15 @@ def mask_cmp_op(x, y, op, allowed_types): x : array-like y : array-like op : binary operation - allowed_types : class or tuple of classes Returns ------- result : ndarray[bool] """ - # TODO: Can we make the allowed_types arg unnecessary? + # TODO: can we do this without casting to list? xrav = x.ravel() result = np.empty(x.size, dtype=bool) - if isinstance(y, allowed_types): + if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), @@ -1006,12 +1005,7 @@ def column_op(a, b): raise NotImplementedError(right) new_data = expressions.evaluate(column_op, str_rep, left, right) - - result = left._constructor(new_data, index=left.index, copy=False) - # Pin columns instead of passing to constructor for compat with - # non-unique columns case - result.columns = left.columns - return result + return left._wrap_dispatched_op(new_data, right, func, axis=axis) def dispatch_to_index_op(op, left, right, index_class): @@ -1896,7 +1890,7 @@ def na_op(x, y): with np.errstate(invalid='ignore'): result = op(x, y) except TypeError: - result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) + result = mask_cmp_op(x, y, op) return result @Appender('Wrapper for flexible comparison methods {name}' @@ -1948,7 +1942,7 @@ def f(self, other): else: # straight boolean comparisons we want to allow all columns - # (regardless of dtype to pass thru) See #4537 for discussion. + # (regardless of dtype to pass thru) See GH#4537 for discussion. res = self._combine_const(other, func) return res.fillna(True).astype(bool) @@ -1986,7 +1980,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: - result = mask_cmp_op(x, y, op, np.ndarray) + result = mask_cmp_op(x, y, op) return result @Appender('Wrapper for comparison method {name}'.format(name=op_name)) diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index ee7de49bc1bce..7ebedd02a5ff3 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -339,9 +339,10 @@ def _apply_columns(self, func): for col, series in compat.iteritems(self): new_data[col] = func(series) - return self._constructor( - data=new_data, index=self.index, columns=self.columns, - default_fill_value=self.default_fill_value).__finalize__(self) + # pass dummy arguments for func and axis; `other` just needs to be + # a scalar. + return self._wrap_dispatched_op(new_data, other=None, + func=None, axis=None) def astype(self, dtype): return self._apply_columns(lambda x: x.astype(dtype)) @@ -547,6 +548,31 @@ def xs(self, key, axis=0, copy=False): # ---------------------------------------------------------------------- # Arithmetic-related methods + def _wrap_dispatched_op(self, result, other, func, axis=None): + """ + Wrap the result of an arithmetic/comparison operation performed + via ops.dispatch_to_series in a properly-indexed DataFrame. + + Parameters + ---------- + result : dict[int:Series] + other : DataFrame, Series, scalar + func : binary function + axis : {None, "columns"} + + Returns + ------- + SparseDataFrame + """ + + fill_value = self._get_op_result_fill_value(other, func, axis) + + res = self._constructor(result, index=self.index, + columns=self.columns, + default_fill_value=fill_value) + + return res.__finalize__(self) + def _combine_frame(self, other, func, fill_value=None, level=None): if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -557,31 +583,22 @@ def _combine_frame(self, other, func, fill_value=None, level=None): if self.empty and other.empty: return self._constructor(index=new_index).__finalize__(self) - new_data = {} - if fill_value is not None: - # TODO: be a bit more intelligent here - for col in new_columns: - if col in this and col in other: - dleft = this[col].to_dense() - dright = other[col].to_dense() - result = dleft._binop(dright, func, fill_value=fill_value) - result = result.to_sparse(fill_value=this[col].fill_value) - new_data[col] = result - else: + def _arith_op(left, right): + # analogous to _arith_op defined in DataFrame._combine_frame + dleft = left.to_dense() + dright = right.to_dense() + result = dleft._binop(dright, func, fill_value=fill_value) + result = result.to_sparse(fill_value=left.fill_value) + return result - for col in new_columns: - if col in this and col in other: - new_data[col] = func(this[col], other[col]) + assert all(col in this and col in other for col in new_columns) - new_fill_value = self._get_op_result_fill_value(other, func) + new_data = {col: _arith_op(this[col], other[col]) + for col in new_columns} - return self._constructor(data=new_data, index=new_index, - columns=new_columns, - default_fill_value=new_fill_value - ).__finalize__(self) + return this._wrap_dispatched_op(new_data, other, func, axis=None) def _combine_match_index(self, other, func, level=None): - new_data = {} if level is not None: raise NotImplementedError("'level' argument is not supported") @@ -589,14 +606,10 @@ def _combine_match_index(self, other, func, level=None): this, other = self.align(other, join='outer', axis=0, level=level, copy=False) - for col, series in compat.iteritems(this): - new_data[col] = func(series.values, other.values) + new_data = {col: func(this[col].values, other.values) + for col in this.columns} - fill_value = self._get_op_result_fill_value(other, func) - - return self._constructor( - new_data, index=this.index, columns=self.columns, - default_fill_value=fill_value).__finalize__(self) + return self._wrap_dispatched_op(new_data, other, func, axis=None) def _combine_match_columns(self, other, func, level=None): # patched version of DataFrame._combine_match_columns to account for @@ -611,29 +624,33 @@ def _combine_match_columns(self, other, func, level=None): copy=False) assert left.columns.equals(right.index) - new_data = {} - - for col in left.columns: - new_data[col] = func(left[col], float(right[col])) + new_data = {col: func(left[col], float(right[col])) + for col in left.columns} - return self._constructor( - new_data, index=left.index, columns=left.columns, - default_fill_value=self.default_fill_value).__finalize__(self) + return self._wrap_dispatched_op(new_data, other, func, "columns") def _combine_const(self, other, func): - return self._apply_columns(lambda x: func(x, other)) + new_data = {col: func(self[col], other) for col in self.columns} + + return self._wrap_dispatched_op(new_data, other, func, axis=None) - def _get_op_result_fill_value(self, other, func): + def _get_op_result_fill_value(self, other, func, axis=None): own_default = self.default_fill_value - if isinstance(other, DataFrame): + axis = self._get_axis_name(axis) if axis is not None else None + + if axis == "columns": + # i.e. called via _combine_match_columns + fill_value = self.default_fill_value + + elif isinstance(other, DataFrame): # i.e. called from _combine_frame other_default = getattr(other, 'default_fill_value', np.nan) # if the fill values are the same use them? or use a valid one if own_default == other_default: - # TOOD: won't this evaluate as False if both are np.nan? + # TODO: won't this evaluate as False if both are np.nan? fill_value = own_default elif np.isnan(own_default) and not np.isnan(other_default): fill_value = other_default @@ -652,6 +669,10 @@ def _get_op_result_fill_value(self, other, func): fill_value = func(np.float64(own_default), np.float64(other.fill_value)) + elif np.ndim(other) == 0: + # i.e. called via _combine_const + fill_value = self.default_fill_value + else: raise NotImplementedError(type(other))