Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f101e66

Browse files
committedAug 3, 2016
ENH: add sparse op for other dtypes
1 parent 97de42a commit f101e66

File tree

13 files changed

+6336
-580
lines changed

13 files changed

+6336
-580
lines changed
 

‎doc/source/whatsnew/v0.19.0.txt

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,29 @@ Google BigQuery Enhancements
307307
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
308308
- The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs <io.bigquery_reader>` for more details (:issue:`13615`).
309309

310+
.. _whatsnew_0190.sparse:
311+
312+
Sparse changes
313+
~~~~~~~~~~~~~~
314+
315+
These changes allow pandas to handle sparse data with more dtypes, and for work to make a smoother experience with data handling.
316+
317+
- Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`)
318+
319+
.. ipython:: python
320+
321+
s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64)
322+
s.dtype
323+
324+
s + 1
325+
326+
327+
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
328+
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
329+
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
330+
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
331+
- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`)
332+
310333
.. _whatsnew_0190.enhancements.other:
311334

312335
Other enhancements
@@ -754,11 +777,6 @@ Bug Fixes
754777
- Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`)
755778
- Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
756779
- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
757-
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
758-
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
759-
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
760-
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
761-
- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`)
762780
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
763781
- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
764782
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)

‎pandas/sparse/array.py

Lines changed: 72 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,14 @@ def wrapper(self, other):
4848
raise AssertionError("length mismatch: %d vs. %d" %
4949
(len(self), len(other)))
5050
if not isinstance(other, ABCSparseArray):
51-
other = SparseArray(other, fill_value=self.fill_value)
52-
if name[0] == 'r':
53-
return _sparse_array_op(other, self, op, name[1:])
54-
else:
55-
return _sparse_array_op(self, other, op, name)
51+
dtype = getattr(other, 'dtype', None)
52+
other = SparseArray(other, fill_value=self.fill_value,
53+
dtype=dtype)
54+
return _sparse_array_op(self, other, op, name)
5655
elif is_scalar(other):
57-
new_fill_value = op(np.float64(self.fill_value), np.float64(other))
58-
56+
fill = op(_get_fill(self), np.asarray(other))
5957
return _wrap_result(name, op(self.sp_values, other),
60-
self.sp_index, new_fill_value)
58+
self.sp_index, fill)
6159
else: # pragma: no cover
6260
raise TypeError('operation with %s not supported' % type(other))
6361

@@ -67,33 +65,74 @@ def wrapper(self, other):
6765
return wrapper
6866

6967

70-
def _sparse_array_op(left, right, op, name):
71-
if left.sp_index.equals(right.sp_index):
72-
result = op(left.sp_values, right.sp_values)
73-
result_index = left.sp_index
68+
def _maybe_match_dtype(left, right):
69+
if not hasattr(right, 'dtype'):
70+
return left.dtype
71+
elif left.dtype == right.dtype:
72+
return getattr(left.dtype, '__name__', left.dtype)
7473
else:
75-
sparse_op = getattr(splib, 'sparse_%s' % name)
76-
result, result_index = sparse_op(left.sp_values, left.sp_index,
77-
left.fill_value, right.sp_values,
78-
right.sp_index, right.fill_value)
74+
# ToDo: to be supported after GH 667
75+
raise NotImplementedError('dtypes must be identical')
76+
77+
78+
def _get_fill(arr):
79+
# coerce fill_value to arr dtype if possible
80+
# int64 SparseArray can have NaN as fill_value if there is no missing
7981
try:
80-
fill_value = op(left.fill_value, right.fill_value)
81-
except:
82-
fill_value = nan
83-
return _wrap_result(name, result, result_index, fill_value)
82+
return np.asarray(arr.fill_value, dtype=arr.dtype)
83+
except ValueError:
84+
return np.asarray(arr.fill_value)
8485

8586

86-
def _wrap_result(name, data, sparse_index, fill_value):
87+
def _sparse_array_op(left, right, op, name, series=False):
88+
89+
if series and is_integer_dtype(left) and is_integer_dtype(right):
90+
# series coerces to float64 if result should have NaN/inf
91+
if name in ('floordiv', 'mod') and (right.values == 0).any():
92+
left = left.astype(np.float64)
93+
right = right.astype(np.float64)
94+
elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
95+
left = left.astype(np.float64)
96+
right = right.astype(np.float64)
97+
98+
dtype = _maybe_match_dtype(left, right)
99+
100+
if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
101+
result = op(left.get_values(), right.get_values())
102+
103+
if left.sp_index.ngaps == 0:
104+
index = left.sp_index
105+
else:
106+
index = right.sp_index
107+
fill = op(_get_fill(left), _get_fill(right))
108+
elif left.sp_index.equals(right.sp_index):
109+
result = op(left.sp_values, right.sp_values)
110+
index = left.sp_index
111+
fill = op(_get_fill(left), _get_fill(right))
112+
else:
113+
if name[0] == 'r':
114+
left, right = right, left
115+
name = name[1:]
116+
117+
opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
118+
sparse_op = getattr(splib, opname)
119+
120+
result, index, fill = sparse_op(left.sp_values, left.sp_index,
121+
left.fill_value, right.sp_values,
122+
right.sp_index, right.fill_value)
123+
return _wrap_result(name, result, index, fill, dtype=result.dtype)
124+
125+
126+
def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
87127
""" wrap op result to have correct dtype """
88128
if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
89129
# ToDo: We can remove this condition when removing
90130
# SparseArray's dtype default when closing GH 667
91-
return SparseArray(data, sparse_index=sparse_index,
92-
fill_value=fill_value,
93-
dtype=np.bool)
94-
else:
95-
return SparseArray(data, sparse_index=sparse_index,
96-
fill_value=fill_value)
131+
dtype = np.bool
132+
elif name == 'truediv':
133+
dtype = np.float64
134+
return SparseArray(data, sparse_index=sparse_index,
135+
fill_value=fill_value, dtype=dtype)
97136

98137

99138
class SparseArray(PandasObject, np.ndarray):
@@ -419,7 +458,12 @@ def astype(self, dtype=None):
419458
dtype = np.dtype(dtype)
420459
if dtype is not None and dtype not in (np.float_, float):
421460
raise TypeError('Can only support floating point data for now')
422-
return self.copy()
461+
462+
if self.dtype == dtype:
463+
return self.copy()
464+
else:
465+
return self._simple_new(self.sp_values.astype(dtype),
466+
self.sp_index, float(self.fill_value))
423467

424468
def copy(self, deep=True):
425469
"""

‎pandas/sparse/series.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,9 @@ def wrapper(self, other):
5757
elif isinstance(other, DataFrame):
5858
return NotImplemented
5959
elif is_scalar(other):
60-
if isnull(other) or isnull(self.fill_value):
61-
new_fill_value = np.nan
62-
else:
63-
new_fill_value = op(np.float64(self.fill_value),
64-
np.float64(other))
65-
66-
return self._constructor(op(self.sp_values, other),
60+
new_values = op(self.values, other)
61+
return self._constructor(new_values,
6762
index=self.index,
68-
sparse_index=self.sp_index,
69-
fill_value=new_fill_value,
7063
name=self.name)
7164
else: # pragma: no cover
7265
raise TypeError('operation with %s not supported' % type(other))
@@ -84,7 +77,8 @@ def _sparse_series_op(left, right, op, name):
8477
new_index = left.index
8578
new_name = _maybe_match_name(left, right)
8679

87-
result = _sparse_array_op(left, right, op, name)
80+
result = _sparse_array_op(left.values, right.values, op, name,
81+
series=True)
8882
return left._constructor(result, index=new_index, name=new_name)
8983

9084

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pandas.util.testing as tm
4+
5+
6+
class TestSparseArrayArithmetics(tm.TestCase):
7+
8+
_multiprocess_can_split_ = True
9+
10+
_base = np.array
11+
_klass = pd.SparseArray
12+
13+
def _assert(self, a, b):
14+
tm.assert_numpy_array_equal(a, b)
15+
16+
def _check_numeric_ops(self, a, b, a_dense, b_dense):
17+
# sparse & sparse
18+
self._assert((a + b).to_dense(), a_dense + b_dense)
19+
self._assert((b + a).to_dense(), b_dense + a_dense)
20+
21+
self._assert((a - b).to_dense(), a_dense - b_dense)
22+
self._assert((b - a).to_dense(), b_dense - a_dense)
23+
24+
self._assert((a * b).to_dense(), a_dense * b_dense)
25+
self._assert((b * a).to_dense(), b_dense * a_dense)
26+
27+
# pandas uses future division
28+
self._assert((a / b).to_dense(), a_dense * 1.0 / b_dense)
29+
self._assert((b / a).to_dense(), b_dense * 1.0 / a_dense)
30+
31+
# ToDo: FIXME in GH 13843
32+
if not (self._base == pd.Series and a.dtype == 'int64'):
33+
self._assert((a // b).to_dense(), a_dense // b_dense)
34+
self._assert((b // a).to_dense(), b_dense // a_dense)
35+
36+
self._assert((a % b).to_dense(), a_dense % b_dense)
37+
self._assert((b % a).to_dense(), b_dense % a_dense)
38+
39+
self._assert((a ** b).to_dense(), a_dense ** b_dense)
40+
self._assert((b ** a).to_dense(), b_dense ** a_dense)
41+
42+
# sparse & dense
43+
self._assert((a + b_dense).to_dense(), a_dense + b_dense)
44+
self._assert((b_dense + a).to_dense(), b_dense + a_dense)
45+
46+
self._assert((a - b_dense).to_dense(), a_dense - b_dense)
47+
self._assert((b_dense - a).to_dense(), b_dense - a_dense)
48+
49+
self._assert((a * b_dense).to_dense(), a_dense * b_dense)
50+
self._assert((b_dense * a).to_dense(), b_dense * a_dense)
51+
52+
# pandas uses future division
53+
self._assert((a / b_dense).to_dense(), a_dense * 1.0 / b_dense)
54+
self._assert((b_dense / a).to_dense(), b_dense * 1.0 / a_dense)
55+
56+
# ToDo: FIXME in GH 13843
57+
if not (self._base == pd.Series and a.dtype == 'int64'):
58+
self._assert((a // b_dense).to_dense(), a_dense // b_dense)
59+
self._assert((b_dense // a).to_dense(), b_dense // a_dense)
60+
61+
self._assert((a % b_dense).to_dense(), a_dense % b_dense)
62+
self._assert((b_dense % a).to_dense(), b_dense % a_dense)
63+
64+
self._assert((a ** b_dense).to_dense(), a_dense ** b_dense)
65+
self._assert((b_dense ** a).to_dense(), b_dense ** a_dense)
66+
67+
def _check_bool_result(self, res):
68+
tm.assertIsInstance(res, self._klass)
69+
self.assertEqual(res.dtype, np.bool)
70+
self.assertIsInstance(res.fill_value, bool)
71+
72+
def _check_comparison_ops(self, a, b, a_dense, b_dense):
73+
# sparse & sparse
74+
self._check_bool_result(a == b)
75+
self._assert((a == b).to_dense(), a_dense == b_dense)
76+
77+
self._check_bool_result(a != b)
78+
self._assert((a != b).to_dense(), a_dense != b_dense)
79+
80+
self._check_bool_result(a >= b)
81+
self._assert((a >= b).to_dense(), a_dense >= b_dense)
82+
83+
self._check_bool_result(a <= b)
84+
self._assert((a <= b).to_dense(), a_dense <= b_dense)
85+
86+
self._check_bool_result(a > b)
87+
self._assert((a > b).to_dense(), a_dense > b_dense)
88+
89+
self._check_bool_result(a < b)
90+
self._assert((a < b).to_dense(), a_dense < b_dense)
91+
92+
# sparse & dense
93+
self._check_bool_result(a == b_dense)
94+
self._assert((a == b_dense).to_dense(), a_dense == b_dense)
95+
96+
self._check_bool_result(a != b_dense)
97+
self._assert((a != b_dense).to_dense(), a_dense != b_dense)
98+
99+
self._check_bool_result(a >= b_dense)
100+
self._assert((a >= b_dense).to_dense(), a_dense >= b_dense)
101+
102+
self._check_bool_result(a <= b_dense)
103+
self._assert((a <= b_dense).to_dense(), a_dense <= b_dense)
104+
105+
self._check_bool_result(a > b_dense)
106+
self._assert((a > b_dense).to_dense(), a_dense > b_dense)
107+
108+
self._check_bool_result(a < b_dense)
109+
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
110+
111+
def test_float_scalar(self):
112+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
113+
114+
for kind in ['integer', 'block']:
115+
a = self._klass(values, kind=kind)
116+
self._check_numeric_ops(a, 1, values, 1)
117+
self._check_numeric_ops(a, 0, values, 0)
118+
self._check_numeric_ops(a, 3, values, 3)
119+
120+
a = self._klass(values, kind=kind, fill_value=0)
121+
self._check_numeric_ops(a, 1, values, 1)
122+
self._check_numeric_ops(a, 0, values, 0)
123+
self._check_numeric_ops(a, 3, values, 3)
124+
125+
a = self._klass(values, kind=kind, fill_value=2)
126+
self._check_numeric_ops(a, 1, values, 1)
127+
self._check_numeric_ops(a, 0, values, 0)
128+
self._check_numeric_ops(a, 3, values, 3)
129+
130+
def test_float_scalar_comparison(self):
131+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
132+
133+
for kind in ['integer', 'block']:
134+
a = self._klass(values, kind=kind)
135+
self._check_comparison_ops(a, 1, values, 1)
136+
self._check_comparison_ops(a, 0, values, 0)
137+
self._check_comparison_ops(a, 3, values, 3)
138+
139+
a = self._klass(values, kind=kind, fill_value=0)
140+
self._check_comparison_ops(a, 1, values, 1)
141+
self._check_comparison_ops(a, 0, values, 0)
142+
self._check_comparison_ops(a, 3, values, 3)
143+
144+
a = self._klass(values, kind=kind, fill_value=2)
145+
self._check_comparison_ops(a, 1, values, 1)
146+
self._check_comparison_ops(a, 0, values, 0)
147+
self._check_comparison_ops(a, 3, values, 3)
148+
149+
def test_float_same_index(self):
150+
# when sp_index are the same
151+
for kind in ['integer', 'block']:
152+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
153+
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
154+
155+
a = self._klass(values, kind=kind)
156+
b = self._klass(rvalues, kind=kind)
157+
self._check_numeric_ops(a, b, values, rvalues)
158+
159+
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
160+
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
161+
162+
a = self._klass(values, kind=kind, fill_value=0)
163+
b = self._klass(rvalues, kind=kind, fill_value=0)
164+
self._check_numeric_ops(a, b, values, rvalues)
165+
166+
def test_float_same_index_comparison(self):
167+
# when sp_index are the same
168+
for kind in ['integer', 'block']:
169+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
170+
rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
171+
172+
a = self._klass(values, kind=kind)
173+
b = self._klass(rvalues, kind=kind)
174+
self._check_comparison_ops(a, b, values, rvalues)
175+
176+
values = self._base([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
177+
rvalues = self._base([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
178+
179+
a = self._klass(values, kind=kind, fill_value=0)
180+
b = self._klass(rvalues, kind=kind, fill_value=0)
181+
self._check_comparison_ops(a, b, values, rvalues)
182+
183+
def test_float_array(self):
184+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
185+
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
186+
187+
for kind in ['integer', 'block']:
188+
a = self._klass(values, kind=kind)
189+
b = self._klass(rvalues, kind=kind)
190+
self._check_numeric_ops(a, b, values, rvalues)
191+
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
192+
193+
a = self._klass(values, kind=kind, fill_value=0)
194+
b = self._klass(rvalues, kind=kind)
195+
self._check_numeric_ops(a, b, values, rvalues)
196+
197+
a = self._klass(values, kind=kind, fill_value=0)
198+
b = self._klass(rvalues, kind=kind, fill_value=0)
199+
self._check_numeric_ops(a, b, values, rvalues)
200+
201+
a = self._klass(values, kind=kind, fill_value=1)
202+
b = self._klass(rvalues, kind=kind, fill_value=2)
203+
self._check_numeric_ops(a, b, values, rvalues)
204+
205+
def test_float_array_different_kind(self):
206+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
207+
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
208+
209+
a = self._klass(values, kind='integer')
210+
b = self._klass(rvalues, kind='block')
211+
self._check_numeric_ops(a, b, values, rvalues)
212+
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
213+
214+
a = self._klass(values, kind='integer', fill_value=0)
215+
b = self._klass(rvalues, kind='block')
216+
self._check_numeric_ops(a, b, values, rvalues)
217+
218+
a = self._klass(values, kind='integer', fill_value=0)
219+
b = self._klass(rvalues, kind='block', fill_value=0)
220+
self._check_numeric_ops(a, b, values, rvalues)
221+
222+
a = self._klass(values, kind='integer', fill_value=1)
223+
b = self._klass(rvalues, kind='block', fill_value=2)
224+
self._check_numeric_ops(a, b, values, rvalues)
225+
226+
def test_float_array_comparison(self):
227+
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
228+
rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
229+
230+
for kind in ['integer', 'block']:
231+
a = self._klass(values, kind=kind)
232+
b = self._klass(rvalues, kind=kind)
233+
self._check_comparison_ops(a, b, values, rvalues)
234+
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
235+
236+
a = self._klass(values, kind=kind, fill_value=0)
237+
b = self._klass(rvalues, kind=kind)
238+
self._check_comparison_ops(a, b, values, rvalues)
239+
240+
a = self._klass(values, kind=kind, fill_value=0)
241+
b = self._klass(rvalues, kind=kind, fill_value=0)
242+
self._check_comparison_ops(a, b, values, rvalues)
243+
244+
a = self._klass(values, kind=kind, fill_value=1)
245+
b = self._klass(rvalues, kind=kind, fill_value=2)
246+
self._check_comparison_ops(a, b, values, rvalues)
247+
248+
def test_int_array(self):
249+
# have to specify dtype explicitly until fixing GH 667
250+
dtype = np.int64
251+
252+
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype)
253+
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype)
254+
255+
for kind in ['integer', 'block']:
256+
a = self._klass(values, dtype=dtype, kind=kind)
257+
self.assertEqual(a.dtype, dtype)
258+
b = self._klass(rvalues, dtype=dtype, kind=kind)
259+
self.assertEqual(b.dtype, dtype)
260+
261+
self._check_numeric_ops(a, b, values, rvalues)
262+
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
263+
264+
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
265+
self.assertEqual(a.dtype, dtype)
266+
b = self._klass(rvalues, dtype=dtype, kind=kind)
267+
self.assertEqual(b.dtype, dtype)
268+
269+
self._check_numeric_ops(a, b, values, rvalues)
270+
271+
a = self._klass(values, fill_value=0, dtype=dtype, kind=kind)
272+
self.assertEqual(a.dtype, dtype)
273+
b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind)
274+
self.assertEqual(b.dtype, dtype)
275+
self._check_numeric_ops(a, b, values, rvalues)
276+
277+
a = self._klass(values, fill_value=1, dtype=dtype, kind=kind)
278+
self.assertEqual(a.dtype, dtype)
279+
b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind)
280+
self.assertEqual(b.dtype, dtype)
281+
self._check_numeric_ops(a, b, values, rvalues)
282+
283+
def test_int_array_comparison(self):
284+
values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0])
285+
rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0])
286+
287+
dtype = np.int64
288+
289+
for kind in ['integer', 'block']:
290+
a = self._klass(values, dtype=dtype, kind=kind)
291+
b = self._klass(rvalues, dtype=dtype, kind=kind)
292+
self._check_comparison_ops(a, b, values, rvalues)
293+
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
294+
295+
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
296+
b = self._klass(rvalues, dtype=dtype, kind=kind)
297+
self._check_comparison_ops(a, b, values, rvalues)
298+
299+
a = self._klass(values, dtype=dtype, kind=kind, fill_value=0)
300+
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0)
301+
self._check_comparison_ops(a, b, values, rvalues)
302+
303+
a = self._klass(values, dtype=dtype, kind=kind, fill_value=1)
304+
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
305+
self._check_comparison_ops(a, b, values, rvalues)
306+
307+
308+
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
309+
310+
_base = pd.Series
311+
_klass = pd.SparseSeries
312+
313+
def _assert(self, a, b):
314+
tm.assert_series_equal(a, b)
315+
316+
def _check_bool_result(self, res):
317+
# ToDo: Must return SparseSeries after GH 667
318+
tm.assertIsInstance(res, self._base)
319+
self.assertEqual(res.dtype, np.bool)
320+
321+
def test_alignment(self):
322+
da = pd.Series(np.arange(4))
323+
db = pd.Series(np.arange(4), index=[1, 2, 3, 4])
324+
325+
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
326+
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
327+
dtype=np.int64, fill_value=0)
328+
self._check_numeric_ops(sa, sb, da, db)
329+
330+
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
331+
sb = pd.SparseSeries(np.arange(4), index=[1, 2, 3, 4],
332+
dtype=np.int64, fill_value=np.nan)
333+
self._check_numeric_ops(sa, sb, da, db)
334+
335+
da = pd.Series(np.arange(4))
336+
db = pd.Series(np.arange(4), index=[10, 11, 12, 13])
337+
338+
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=0)
339+
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
340+
dtype=np.int64, fill_value=0)
341+
self._check_numeric_ops(sa, sb, da, db)
342+
343+
sa = pd.SparseSeries(np.arange(4), dtype=np.int64, fill_value=np.nan)
344+
sb = pd.SparseSeries(np.arange(4), index=[10, 11, 12, 13],
345+
dtype=np.int64, fill_value=np.nan)
346+
self._check_numeric_ops(sa, sb, da, db)

‎pandas/sparse/tests/test_array.py

Lines changed: 0 additions & 189 deletions
Original file line numberDiff line numberDiff line change
@@ -539,195 +539,6 @@ def test_fillna_overlap(self):
539539
tm.assert_sp_array_equal(res, exp)
540540

541541

542-
class TestSparseArrayArithmetic(tm.TestCase):
543-
544-
_multiprocess_can_split_ = True
545-
546-
def _check_numeric_ops(self, a, b, a_dense, b_dense):
547-
tm.assert_numpy_array_equal((a + b).to_dense(), a_dense + b_dense)
548-
tm.assert_numpy_array_equal((b + a).to_dense(), b_dense + a_dense)
549-
550-
tm.assert_numpy_array_equal((a - b).to_dense(), a_dense - b_dense)
551-
tm.assert_numpy_array_equal((b - a).to_dense(), b_dense - a_dense)
552-
553-
tm.assert_numpy_array_equal((a * b).to_dense(), a_dense * b_dense)
554-
tm.assert_numpy_array_equal((b * a).to_dense(), b_dense * a_dense)
555-
556-
tm.assert_numpy_array_equal((a / b).to_dense(), a_dense / b_dense)
557-
tm.assert_numpy_array_equal((b / a).to_dense(), b_dense / a_dense)
558-
559-
tm.assert_numpy_array_equal((a // b).to_dense(), a_dense // b_dense)
560-
tm.assert_numpy_array_equal((b // a).to_dense(), b_dense // a_dense)
561-
562-
tm.assert_numpy_array_equal((a % b).to_dense(), a_dense % b_dense)
563-
tm.assert_numpy_array_equal((b % a).to_dense(), b_dense % a_dense)
564-
565-
tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense)
566-
tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense)
567-
568-
def _check_comparison_ops(self, a, b, a_dense, b_dense):
569-
570-
def _check(res):
571-
tm.assertIsInstance(res, SparseArray)
572-
self.assertEqual(res.dtype, np.bool)
573-
self.assertIsInstance(res.fill_value, bool)
574-
575-
_check(a == b)
576-
tm.assert_numpy_array_equal((a == b).to_dense(), a_dense == b_dense)
577-
578-
_check(a != b)
579-
tm.assert_numpy_array_equal((a != b).to_dense(), a_dense != b_dense)
580-
581-
_check(a >= b)
582-
tm.assert_numpy_array_equal((a >= b).to_dense(), a_dense >= b_dense)
583-
584-
_check(a <= b)
585-
tm.assert_numpy_array_equal((a <= b).to_dense(), a_dense <= b_dense)
586-
587-
_check(a > b)
588-
tm.assert_numpy_array_equal((a > b).to_dense(), a_dense > b_dense)
589-
590-
_check(a < b)
591-
tm.assert_numpy_array_equal((a < b).to_dense(), a_dense < b_dense)
592-
593-
def test_float_scalar(self):
594-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
595-
596-
for kind in ['integer', 'block']:
597-
a = SparseArray(values, kind=kind)
598-
self._check_numeric_ops(a, 1, values, 1)
599-
self._check_numeric_ops(a, 0, values, 0)
600-
self._check_numeric_ops(a, 3, values, 3)
601-
602-
a = SparseArray(values, kind=kind, fill_value=0)
603-
self._check_numeric_ops(a, 1, values, 1)
604-
self._check_numeric_ops(a, 0, values, 0)
605-
self._check_numeric_ops(a, 3, values, 3)
606-
607-
a = SparseArray(values, kind=kind, fill_value=2)
608-
self._check_numeric_ops(a, 1, values, 1)
609-
self._check_numeric_ops(a, 0, values, 0)
610-
self._check_numeric_ops(a, 3, values, 3)
611-
612-
def test_float_scalar_comparison(self):
613-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
614-
615-
for kind in ['integer', 'block']:
616-
a = SparseArray(values, kind=kind)
617-
self._check_comparison_ops(a, 1, values, 1)
618-
self._check_comparison_ops(a, 0, values, 0)
619-
self._check_comparison_ops(a, 3, values, 3)
620-
621-
a = SparseArray(values, kind=kind, fill_value=0)
622-
self._check_comparison_ops(a, 1, values, 1)
623-
self._check_comparison_ops(a, 0, values, 0)
624-
self._check_comparison_ops(a, 3, values, 3)
625-
626-
a = SparseArray(values, kind=kind, fill_value=2)
627-
self._check_comparison_ops(a, 1, values, 1)
628-
self._check_comparison_ops(a, 0, values, 0)
629-
self._check_comparison_ops(a, 3, values, 3)
630-
631-
def test_float_same_index(self):
632-
# when sp_index are the same
633-
for kind in ['integer', 'block']:
634-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
635-
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
636-
637-
a = SparseArray(values, kind=kind)
638-
b = SparseArray(rvalues, kind=kind)
639-
self._check_numeric_ops(a, b, values, rvalues)
640-
641-
values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
642-
rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
643-
644-
a = SparseArray(values, kind=kind, fill_value=0)
645-
b = SparseArray(rvalues, kind=kind, fill_value=0)
646-
self._check_numeric_ops(a, b, values, rvalues)
647-
648-
def test_float_same_index_comparison(self):
649-
# when sp_index are the same
650-
for kind in ['integer', 'block']:
651-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
652-
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
653-
654-
a = SparseArray(values, kind=kind)
655-
b = SparseArray(rvalues, kind=kind)
656-
self._check_comparison_ops(a, b, values, rvalues)
657-
658-
values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
659-
rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
660-
661-
a = SparseArray(values, kind=kind, fill_value=0)
662-
b = SparseArray(rvalues, kind=kind, fill_value=0)
663-
self._check_comparison_ops(a, b, values, rvalues)
664-
665-
def test_float_array(self):
666-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
667-
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
668-
669-
for kind in ['integer', 'block']:
670-
a = SparseArray(values, kind=kind)
671-
b = SparseArray(rvalues, kind=kind)
672-
self._check_numeric_ops(a, b, values, rvalues)
673-
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
674-
675-
a = SparseArray(values, kind=kind, fill_value=0)
676-
b = SparseArray(rvalues, kind=kind)
677-
self._check_numeric_ops(a, b, values, rvalues)
678-
679-
a = SparseArray(values, kind=kind, fill_value=0)
680-
b = SparseArray(rvalues, kind=kind, fill_value=0)
681-
self._check_numeric_ops(a, b, values, rvalues)
682-
683-
a = SparseArray(values, kind=kind, fill_value=1)
684-
b = SparseArray(rvalues, kind=kind, fill_value=2)
685-
self._check_numeric_ops(a, b, values, rvalues)
686-
687-
def test_float_array_different_kind(self):
688-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
689-
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
690-
691-
a = SparseArray(values, kind='integer')
692-
b = SparseArray(rvalues, kind='block')
693-
self._check_numeric_ops(a, b, values, rvalues)
694-
self._check_numeric_ops(a, b * 0, values, rvalues * 0)
695-
696-
a = SparseArray(values, kind='integer', fill_value=0)
697-
b = SparseArray(rvalues, kind='block')
698-
self._check_numeric_ops(a, b, values, rvalues)
699-
700-
a = SparseArray(values, kind='integer', fill_value=0)
701-
b = SparseArray(rvalues, kind='block', fill_value=0)
702-
self._check_numeric_ops(a, b, values, rvalues)
703-
704-
a = SparseArray(values, kind='integer', fill_value=1)
705-
b = SparseArray(rvalues, kind='block', fill_value=2)
706-
self._check_numeric_ops(a, b, values, rvalues)
707-
708-
def test_float_array_comparison(self):
709-
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
710-
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
711-
712-
for kind in ['integer', 'block']:
713-
a = SparseArray(values, kind=kind)
714-
b = SparseArray(rvalues, kind=kind)
715-
self._check_comparison_ops(a, b, values, rvalues)
716-
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
717-
718-
a = SparseArray(values, kind=kind, fill_value=0)
719-
b = SparseArray(rvalues, kind=kind)
720-
self._check_comparison_ops(a, b, values, rvalues)
721-
722-
a = SparseArray(values, kind=kind, fill_value=0)
723-
b = SparseArray(rvalues, kind=kind, fill_value=0)
724-
self._check_comparison_ops(a, b, values, rvalues)
725-
726-
a = SparseArray(values, kind=kind, fill_value=1)
727-
b = SparseArray(rvalues, kind=kind, fill_value=2)
728-
self._check_comparison_ops(a, b, values, rvalues)
729-
730-
731542
class TestSparseArrayAnalytics(tm.TestCase):
732543
def test_sum(self):
733544
data = np.arange(10).astype(float)

‎pandas/sparse/tests/test_indexing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def test_loc_index(self):
134134

135135
# sparse array (actuary it coerces to normal Series)
136136
result = sparse.loc[sparse % 2 == 1]
137+
print((sparse % 2 == 1).values)
137138
exp = orig.loc[orig % 2 == 1].to_sparse()
138139
tm.assert_sp_series_equal(result, exp)
139140

‎pandas/sparse/tests/test_libsparse.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -486,13 +486,14 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
486486
xfill = 0
487487
yfill = 2
488488

489-
result_block_vals, rb_index = sparse_op(x, xindex, xfill, y,
490-
yindex, yfill)
491-
result_int_vals, ri_index = sparse_op(x, xdindex, xfill, y,
492-
ydindex, yfill)
489+
result_block_vals, rb_index, bfill = sparse_op(x, xindex, xfill, y,
490+
yindex, yfill)
491+
result_int_vals, ri_index, ifill = sparse_op(x, xdindex, xfill, y,
492+
ydindex, yfill)
493493

494494
self.assertTrue(rb_index.to_int_index().equals(ri_index))
495495
tm.assert_numpy_array_equal(result_block_vals, result_int_vals)
496+
self.assertEqual(bfill, ifill)
496497

497498
# check versus Series...
498499
xseries = Series(x, xdindex.indices)
@@ -517,7 +518,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
517518

518519
def make_optestf(op):
519520
def f(self):
520-
sparse_op = getattr(splib, 'sparse_%s' % op)
521+
sparse_op = getattr(splib, 'sparse_%s_float64' % op)
521522
python_op = getattr(operator, op)
522523
self._op_tests(sparse_op, python_op)
523524

‎pandas/sparse/tests/test_series.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,7 @@ def test_setslice(self):
512512
name=self.bseries.name))
513513

514514
def test_operators(self):
515+
515516
def _check_op(a, b, op):
516517
sp_result = op(a, b)
517518
adense = a.to_dense() if isinstance(a, SparseSeries) else a
@@ -781,7 +782,7 @@ def test_fill_value_corner(self):
781782
cop2 = self.zbseries.copy()
782783
cop2.fill_value = 1
783784
result = cop2 / cop
784-
self.assertTrue(np.isnan(result.fill_value))
785+
self.assertEqual(result.fill_value, np.inf)
785786

786787
def test_fill_value_when_combine_const(self):
787788
# GH12723
@@ -1239,6 +1240,7 @@ def _dense_series_compare(s, f):
12391240

12401241

12411242
class TestSparseSeriesAnalytics(tm.TestCase):
1243+
12421244
def setUp(self):
12431245
arr, index = _test_data1()
12441246
self.bseries = SparseSeries(arr, index=index, kind='block',

‎pandas/src/sparse.pyx

Lines changed: 3 additions & 339 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from numpy cimport ndarray, uint8_t, int32_t, float64_t
1+
from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t,
2+
float64_t, float32_t, float16_t)
23
cimport numpy as np
34

45
cimport cython
@@ -754,346 +755,9 @@ cdef class BlockUnion(BlockMerge):
754755
#-------------------------------------------------------------------------------
755756
# Sparse arithmetic
756757

757-
ctypedef float64_t (* double_func)(float64_t a, float64_t b)
758+
include "sparse_op_helper.pxi"
758759

759760

760-
cdef inline tuple sparse_combine(ndarray x, SparseIndex xindex, float64_t xfill,
761-
ndarray y, SparseIndex yindex, float64_t yfill,
762-
double_func op):
763-
if isinstance(xindex, BlockIndex):
764-
return block_op(x, xindex.to_block_index(), xfill,
765-
y, yindex.to_block_index(), yfill, op)
766-
elif isinstance(xindex, IntIndex):
767-
return int_op(x, xindex.to_int_index(), xfill,
768-
y, yindex.to_int_index(), yfill, op)
769-
770-
771-
@cython.boundscheck(False)
772-
cdef inline tuple block_op(ndarray x_, BlockIndex xindex, float64_t xfill,
773-
ndarray y_, BlockIndex yindex, float64_t yfill,
774-
double_func op):
775-
"""
776-
Binary operator on BlockIndex objects with fill values
777-
"""
778-
779-
cdef:
780-
BlockIndex out_index
781-
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
782-
Py_ssize_t xbp = 0, ybp = 0 # block positions
783-
int32_t xloc, yloc
784-
Py_ssize_t xblock = 0, yblock = 0 # block numbers
785-
786-
ndarray[float64_t, ndim=1] x, y
787-
ndarray[float64_t, ndim=1] out
788-
789-
# to suppress Cython warning
790-
x = x_
791-
y = y_
792-
793-
out_index = xindex.make_union(yindex)
794-
out = np.empty(out_index.npoints, dtype=np.float64)
795-
796-
# Wow, what a hack job. Need to do something about this
797-
798-
# walk the two SparseVectors, adding matched locations...
799-
for out_i from 0 <= out_i < out_index.npoints:
800-
if yblock == yindex.nblocks:
801-
# use y fill value
802-
out[out_i] = op(x[xi], yfill)
803-
xi += 1
804-
805-
# advance x location
806-
xbp += 1
807-
if xbp == xindex.lenbuf[xblock]:
808-
xblock += 1
809-
xbp = 0
810-
continue
811-
812-
if xblock == xindex.nblocks:
813-
# use x fill value
814-
out[out_i] = op(xfill, y[yi])
815-
yi += 1
816-
817-
# advance y location
818-
ybp += 1
819-
if ybp == yindex.lenbuf[yblock]:
820-
yblock += 1
821-
ybp = 0
822-
continue
823-
824-
yloc = yindex.locbuf[yblock] + ybp
825-
xloc = xindex.locbuf[xblock] + xbp
826-
827-
# each index in the out_index had to come from either x, y, or both
828-
if xloc == yloc:
829-
out[out_i] = op(x[xi], y[yi])
830-
xi += 1
831-
yi += 1
832-
833-
# advance both locations
834-
xbp += 1
835-
if xbp == xindex.lenbuf[xblock]:
836-
xblock += 1
837-
xbp = 0
838-
839-
ybp += 1
840-
if ybp == yindex.lenbuf[yblock]:
841-
yblock += 1
842-
ybp = 0
843-
844-
elif xloc < yloc:
845-
# use y fill value
846-
out[out_i] = op(x[xi], yfill)
847-
xi += 1
848-
849-
# advance x location
850-
xbp += 1
851-
if xbp == xindex.lenbuf[xblock]:
852-
xblock += 1
853-
xbp = 0
854-
else:
855-
# use x fill value
856-
out[out_i] = op(xfill, y[yi])
857-
yi += 1
858-
859-
# advance y location
860-
ybp += 1
861-
if ybp == yindex.lenbuf[yblock]:
862-
yblock += 1
863-
ybp = 0
864-
865-
return out, out_index
866-
867-
868-
@cython.boundscheck(False)
869-
cdef inline tuple int_op(ndarray x_, IntIndex xindex, float64_t xfill,
870-
ndarray y_, IntIndex yindex, float64_t yfill,
871-
double_func op):
872-
cdef:
873-
IntIndex out_index
874-
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
875-
int32_t xloc, yloc
876-
ndarray[int32_t, ndim=1] xindices, yindices, out_indices
877-
ndarray[float64_t, ndim=1] x, y
878-
ndarray[float64_t, ndim=1] out
879-
880-
# suppress Cython compiler warnings due to inlining
881-
x = x_
882-
y = y_
883-
884-
# need to do this first to know size of result array
885-
out_index = xindex.make_union(yindex)
886-
out = np.empty(out_index.npoints, dtype=np.float64)
887-
888-
xindices = xindex.indices
889-
yindices = yindex.indices
890-
out_indices = out_index.indices
891-
892-
# walk the two SparseVectors, adding matched locations...
893-
for out_i from 0 <= out_i < out_index.npoints:
894-
if xi == xindex.npoints:
895-
# use x fill value
896-
out[out_i] = op(xfill, y[yi])
897-
yi += 1
898-
continue
899-
900-
if yi == yindex.npoints:
901-
# use y fill value
902-
out[out_i] = op(x[xi], yfill)
903-
xi += 1
904-
continue
905-
906-
xloc = xindices[xi]
907-
yloc = yindices[yi]
908-
909-
# each index in the out_index had to come from either x, y, or both
910-
if xloc == yloc:
911-
out[out_i] = op(x[xi], y[yi])
912-
xi += 1
913-
yi += 1
914-
elif xloc < yloc:
915-
# use y fill value
916-
out[out_i] = op(x[xi], yfill)
917-
xi += 1
918-
else:
919-
# use x fill value
920-
out[out_i] = op(xfill, y[yi])
921-
yi += 1
922-
923-
return out, out_index
924-
925-
cdef inline float64_t __add(float64_t a, float64_t b):
926-
return a + b
927-
928-
cdef inline float64_t __sub(float64_t a, float64_t b):
929-
return a - b
930-
931-
cdef inline float64_t __rsub(float64_t a, float64_t b):
932-
return b - a
933-
934-
cdef inline float64_t __div(float64_t a, float64_t b):
935-
if b == 0:
936-
if a > 0:
937-
return INF
938-
elif a < 0:
939-
return -INF
940-
else:
941-
return NaN
942-
else:
943-
return a / b
944-
945-
cdef inline float64_t __rdiv(float64_t a, float64_t b):
946-
return __div(b, a)
947-
948-
cdef inline float64_t __floordiv(float64_t a, float64_t b):
949-
if b == 0:
950-
# numpy >= 1.11 returns NaN
951-
# for a // 0, rather than +-inf
952-
if _np_version_under1p11:
953-
if a > 0:
954-
return INF
955-
elif a < 0:
956-
return -INF
957-
return NaN
958-
else:
959-
return a // b
960-
961-
cdef inline float64_t __rfloordiv(float64_t a, float64_t b):
962-
return __floordiv(b, a)
963-
964-
cdef inline float64_t __mul(float64_t a, float64_t b):
965-
return a * b
966-
967-
cdef inline float64_t __eq(float64_t a, float64_t b):
968-
return a == b
969-
970-
cdef inline float64_t __ne(float64_t a, float64_t b):
971-
return a != b
972-
973-
cdef inline float64_t __lt(float64_t a, float64_t b):
974-
return a < b
975-
976-
cdef inline float64_t __gt(float64_t a, float64_t b):
977-
return a > b
978-
979-
cdef inline float64_t __le(float64_t a, float64_t b):
980-
return a <= b
981-
982-
cdef inline float64_t __ge(float64_t a, float64_t b):
983-
return a >= b
984-
985-
cdef inline float64_t __mod(float64_t a, float64_t b):
986-
if b == 0:
987-
return NaN
988-
else:
989-
return a % b
990-
991-
cdef inline float64_t __rmod(float64_t a, float64_t b):
992-
return __mod(b, a)
993-
994-
cdef inline float64_t __pow(float64_t a, float64_t b):
995-
return a ** b
996-
997-
cdef inline float64_t __rpow(float64_t a, float64_t b):
998-
return __pow(b, a)
999-
1000-
1001-
# This probably needs to be "templated" to achieve maximum performance.
1002-
# TODO: quantify performance boost to "templating"
1003-
1004-
cpdef sparse_add(ndarray x, SparseIndex xindex, float64_t xfill,
1005-
ndarray y, SparseIndex yindex, float64_t yfill):
1006-
return sparse_combine(x, xindex, xfill,
1007-
y, yindex, yfill, __add)
1008-
1009-
cpdef sparse_sub(ndarray x, SparseIndex xindex, float64_t xfill,
1010-
ndarray y, SparseIndex yindex, float64_t yfill):
1011-
return sparse_combine(x, xindex, xfill,
1012-
y, yindex, yfill, __sub)
1013-
1014-
cpdef sparse_rsub(ndarray x, SparseIndex xindex, float64_t xfill,
1015-
ndarray y, SparseIndex yindex, float64_t yfill):
1016-
return sparse_combine(x, xindex, xfill,
1017-
y, yindex, yfill, __rsub)
1018-
1019-
cpdef sparse_mul(ndarray x, SparseIndex xindex, float64_t xfill,
1020-
ndarray y, SparseIndex yindex, float64_t yfill):
1021-
return sparse_combine(x, xindex, xfill,
1022-
y, yindex, yfill, __mul)
1023-
1024-
cpdef sparse_div(ndarray x, SparseIndex xindex, float64_t xfill,
1025-
ndarray y, SparseIndex yindex, float64_t yfill):
1026-
return sparse_combine(x, xindex, xfill,
1027-
y, yindex, yfill, __div)
1028-
1029-
cpdef sparse_rdiv(ndarray x, SparseIndex xindex, float64_t xfill,
1030-
ndarray y, SparseIndex yindex, float64_t yfill):
1031-
return sparse_combine(x, xindex, xfill,
1032-
y, yindex, yfill, __rdiv)
1033-
1034-
sparse_truediv = sparse_div
1035-
sparse_rtruediv = sparse_rdiv
1036-
1037-
cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill,
1038-
ndarray y, SparseIndex yindex, float64_t yfill):
1039-
return sparse_combine(x, xindex, xfill,
1040-
y, yindex, yfill, __floordiv)
1041-
1042-
cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill,
1043-
ndarray y, SparseIndex yindex, float64_t yfill):
1044-
return sparse_combine(x, xindex, xfill,
1045-
y, yindex, yfill, __rfloordiv)
1046-
1047-
cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill,
1048-
ndarray y, SparseIndex yindex, float64_t yfill):
1049-
return sparse_combine(x, xindex, xfill,
1050-
y, yindex, yfill, __mod)
1051-
1052-
cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill,
1053-
ndarray y, SparseIndex yindex, float64_t yfill):
1054-
return sparse_combine(x, xindex, xfill,
1055-
y, yindex, yfill, __rmod)
1056-
1057-
cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill,
1058-
ndarray y, SparseIndex yindex, float64_t yfill):
1059-
return sparse_combine(x, xindex, xfill,
1060-
y, yindex, yfill, __pow)
1061-
1062-
cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill,
1063-
ndarray y, SparseIndex yindex, float64_t yfill):
1064-
return sparse_combine(x, xindex, xfill,
1065-
y, yindex, yfill, __rpow)
1066-
1067-
cpdef sparse_eq(ndarray x, SparseIndex xindex, float64_t xfill,
1068-
ndarray y, SparseIndex yindex, float64_t yfill):
1069-
return sparse_combine(x, xindex, xfill,
1070-
y, yindex, yfill, __eq)
1071-
1072-
cpdef sparse_ne(ndarray x, SparseIndex xindex, float64_t xfill,
1073-
ndarray y, SparseIndex yindex, float64_t yfill):
1074-
return sparse_combine(x, xindex, xfill,
1075-
y, yindex, yfill, __ne)
1076-
1077-
cpdef sparse_lt(ndarray x, SparseIndex xindex, float64_t xfill,
1078-
ndarray y, SparseIndex yindex, float64_t yfill):
1079-
return sparse_combine(x, xindex, xfill,
1080-
y, yindex, yfill, __lt)
1081-
1082-
cpdef sparse_gt(ndarray x, SparseIndex xindex, float64_t xfill,
1083-
ndarray y, SparseIndex yindex, float64_t yfill):
1084-
return sparse_combine(x, xindex, xfill,
1085-
y, yindex, yfill, __gt)
1086-
1087-
cpdef sparse_le(ndarray x, SparseIndex xindex, float64_t xfill,
1088-
ndarray y, SparseIndex yindex, float64_t yfill):
1089-
return sparse_combine(x, xindex, xfill,
1090-
y, yindex, yfill, __le)
1091-
1092-
cpdef sparse_ge(ndarray x, SparseIndex xindex, float64_t xfill,
1093-
ndarray y, SparseIndex yindex, float64_t yfill):
1094-
return sparse_combine(x, xindex, xfill,
1095-
y, yindex, yfill, __ge)
1096-
1097761
#-------------------------------------------------------------------------------
1098762
# Indexing operations
1099763

‎pandas/src/sparse_op_helper.pxi

Lines changed: 5532 additions & 0 deletions
Large diffs are not rendered by default.

‎pandas/src/sparse_op_helper.pxi.in

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
"""
2+
Template for each `dtype` helper function for sparse ops
3+
4+
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
5+
"""
6+
7+
#----------------------------------------------------------------------
8+
# Sparse op
9+
#----------------------------------------------------------------------
10+
11+
{{py:
12+
13+
# dtype, float_group
14+
dtypes = [('float64', True), ('int64', False)]
15+
16+
}}
17+
18+
{{for dtype, float_group in dtypes}}
19+
20+
{{if float_group}}
21+
22+
cdef inline {{dtype}}_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
23+
if b == 0:
24+
if a > 0:
25+
return INF
26+
elif a < 0:
27+
return -INF
28+
else:
29+
return NaN
30+
else:
31+
return float(a) / b
32+
33+
cdef inline {{dtype}}_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
34+
return __div_{{dtype}}(a, b)
35+
36+
cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
37+
if b == 0:
38+
# numpy >= 1.11 returns NaN
39+
# for a // 0, rather than +-inf
40+
if _np_version_under1p11:
41+
if a > 0:
42+
return INF
43+
elif a < 0:
44+
return -INF
45+
return NaN
46+
else:
47+
return a // b
48+
49+
cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
50+
if b == 0:
51+
return NaN
52+
else:
53+
return a % b
54+
55+
{{else}}
56+
57+
cdef inline float64_t __div_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
58+
if b == 0:
59+
if a > 0:
60+
return INF
61+
elif a < 0:
62+
return -INF
63+
else:
64+
return NaN
65+
else:
66+
return float(a) / b
67+
68+
cdef inline float64_t __truediv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
69+
return __div_{{dtype}}(a, b)
70+
71+
cdef inline {{dtype}}_t __floordiv_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
72+
if b == 0:
73+
return 0
74+
else:
75+
return a // b
76+
77+
cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b):
78+
if b == 0:
79+
return 0
80+
else:
81+
return a % b
82+
83+
{{endif}}
84+
85+
{{endfor}}
86+
87+
#----------------------------------------------------------------------
88+
# sparse array op
89+
#----------------------------------------------------------------------
90+
91+
{{py:
92+
93+
# dtype
94+
dtypes = ['float64', 'int64']
95+
96+
def get_op(tup):
97+
assert isinstance(tup, tuple)
98+
assert len(tup) == 4
99+
100+
opname, lval, rval, dtype = tup
101+
102+
ops_dict = {'add': '{0} + {1}',
103+
'sub': '{0} - {1}',
104+
'mul': '{0} * {1}',
105+
'div': '__div_{2}({0}, {1})',
106+
'mod': '__mod_{2}({0}, {1})',
107+
'truediv': '__truediv_{2}({0}, {1})',
108+
'floordiv': '__floordiv_{2}({0}, {1})',
109+
'pow': '{0} ** {1}',
110+
'eq': '{0} == {1}',
111+
'ne': '{0} != {1}',
112+
'lt': '{0} < {1}',
113+
'gt': '{0} > {1}',
114+
'le': '{0} <= {1}',
115+
'ge': '{0} >= {1}'}
116+
117+
return ops_dict[opname].format(lval, rval, dtype)
118+
119+
120+
def get_dispatch(dtypes):
121+
122+
ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv',
123+
'floordiv', 'pow', 'eq', 'ne', 'lt', 'gt', 'le', 'ge']
124+
125+
for opname in ops_list:
126+
for dtype in dtypes:
127+
128+
if opname in ('div', 'truediv'):
129+
rdtype = 'float64'
130+
elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
131+
rdtype = 'uint8'
132+
else:
133+
rdtype = dtype
134+
135+
yield opname, dtype, rdtype
136+
137+
}}
138+
139+
140+
{{for opname, dtype, rdtype in get_dispatch(dtypes)}}
141+
142+
143+
@cython.wraparound(False)
144+
@cython.boundscheck(False)
145+
cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_,
146+
BlockIndex xindex,
147+
{{dtype}}_t xfill,
148+
ndarray y_,
149+
BlockIndex yindex,
150+
{{dtype}}_t yfill):
151+
'''
152+
Binary operator on BlockIndex objects with fill values
153+
'''
154+
155+
cdef:
156+
BlockIndex out_index
157+
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
158+
Py_ssize_t xbp = 0, ybp = 0 # block positions
159+
int32_t xloc, yloc
160+
Py_ssize_t xblock = 0, yblock = 0 # block numbers
161+
162+
ndarray[{{dtype}}_t, ndim=1] x, y
163+
ndarray[{{rdtype}}_t, ndim=1] out
164+
165+
# to suppress Cython warning
166+
x = x_
167+
y = y_
168+
169+
out_index = xindex.make_union(yindex)
170+
out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
171+
172+
# Wow, what a hack job. Need to do something about this
173+
174+
# walk the two SparseVectors, adding matched locations...
175+
for out_i from 0 <= out_i < out_index.npoints:
176+
if yblock == yindex.nblocks:
177+
# use y fill value
178+
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
179+
xi += 1
180+
181+
# advance x location
182+
xbp += 1
183+
if xbp == xindex.lenbuf[xblock]:
184+
xblock += 1
185+
xbp = 0
186+
continue
187+
188+
if xblock == xindex.nblocks:
189+
# use x fill value
190+
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
191+
yi += 1
192+
193+
# advance y location
194+
ybp += 1
195+
if ybp == yindex.lenbuf[yblock]:
196+
yblock += 1
197+
ybp = 0
198+
continue
199+
200+
yloc = yindex.locbuf[yblock] + ybp
201+
xloc = xindex.locbuf[xblock] + xbp
202+
203+
# each index in the out_index had to come from either x, y, or both
204+
if xloc == yloc:
205+
out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
206+
xi += 1
207+
yi += 1
208+
209+
# advance both locations
210+
xbp += 1
211+
if xbp == xindex.lenbuf[xblock]:
212+
xblock += 1
213+
xbp = 0
214+
215+
ybp += 1
216+
if ybp == yindex.lenbuf[yblock]:
217+
yblock += 1
218+
ybp = 0
219+
220+
elif xloc < yloc:
221+
# use y fill value
222+
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
223+
xi += 1
224+
225+
# advance x location
226+
xbp += 1
227+
if xbp == xindex.lenbuf[xblock]:
228+
xblock += 1
229+
xbp = 0
230+
else:
231+
# use x fill value
232+
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
233+
yi += 1
234+
235+
# advance y location
236+
ybp += 1
237+
if ybp == yindex.lenbuf[yblock]:
238+
yblock += 1
239+
ybp = 0
240+
241+
return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
242+
243+
244+
@cython.wraparound(False)
245+
@cython.boundscheck(False)
246+
cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex,
247+
{{dtype}}_t xfill,
248+
ndarray y_, IntIndex yindex,
249+
{{dtype}}_t yfill):
250+
cdef:
251+
IntIndex out_index
252+
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
253+
int32_t xloc, yloc
254+
ndarray[int32_t, ndim=1] xindices, yindices, out_indices
255+
ndarray[{{dtype}}_t, ndim=1] x, y
256+
ndarray[{{rdtype}}_t, ndim=1] out
257+
258+
# suppress Cython compiler warnings due to inlining
259+
x = x_
260+
y = y_
261+
262+
# need to do this first to know size of result array
263+
out_index = xindex.make_union(yindex)
264+
out = np.empty(out_index.npoints, dtype=np.{{rdtype}})
265+
266+
xindices = xindex.indices
267+
yindices = yindex.indices
268+
out_indices = out_index.indices
269+
270+
# walk the two SparseVectors, adding matched locations...
271+
for out_i from 0 <= out_i < out_index.npoints:
272+
if xi == xindex.npoints:
273+
# use x fill value
274+
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
275+
yi += 1
276+
continue
277+
278+
if yi == yindex.npoints:
279+
# use y fill value
280+
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
281+
xi += 1
282+
continue
283+
284+
xloc = xindices[xi]
285+
yloc = yindices[yi]
286+
287+
# each index in the out_index had to come from either x, y, or both
288+
if xloc == yloc:
289+
out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}}
290+
xi += 1
291+
yi += 1
292+
elif xloc < yloc:
293+
# use y fill value
294+
out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}}
295+
xi += 1
296+
else:
297+
# use x fill value
298+
out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}}
299+
yi += 1
300+
301+
return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}}
302+
303+
304+
cpdef sparse_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x,
305+
SparseIndex xindex, {{dtype}}_t xfill,
306+
ndarray[{{dtype}}_t, ndim=1] y,
307+
SparseIndex yindex, {{dtype}}_t yfill):
308+
309+
if isinstance(xindex, BlockIndex):
310+
return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill,
311+
y, yindex.to_block_index(), yfill)
312+
elif isinstance(xindex, IntIndex):
313+
return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill,
314+
y, yindex.to_int_index(), yfill)
315+
else:
316+
raise NotImplementedError
317+
318+
319+
cpdef sparse_align_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x,
320+
ndarray[{{dtype}}_t, ndim=1] y):
321+
""" to return NumPy compat result """
322+
cdef:
323+
Py_ssize_t i = 0
324+
ndarray[{{rdtype}}_t, ndim=1] out
325+
326+
out = np.empty(len(x), dtype=np.{{rdtype}})
327+
328+
for i in range(len(x)):
329+
out[i] = {{(opname, 'x[i]', 'y[i]', dtype) | get_op}}
330+
return out
331+
332+
333+
cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill,
334+
{{dtype}}_t yfill):
335+
return {{(opname, 'xfill', 'yfill', dtype) | get_op}}
336+
337+
{{endfor}}

‎pandas/tests/series/test_subclass.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,13 @@ def test_subclass_sparse_slice(self):
5454
def test_subclass_sparse_addition(self):
5555
s1 = tm.SubclassedSparseSeries([1, 3, 5])
5656
s2 = tm.SubclassedSparseSeries([-2, 5, 12])
57-
tm.assert_sp_series_equal(s1 + s2,
58-
tm.SubclassedSparseSeries([-1.0, 8.0, 17.0]))
57+
exp = tm.SubclassedSparseSeries([-1, 8, 17])
58+
tm.assert_sp_series_equal(s1 + s2, exp)
59+
60+
s1 = tm.SubclassedSparseSeries([4.0, 5.0, 6.0])
61+
s2 = tm.SubclassedSparseSeries([1.0, 2.0, 3.0])
62+
exp = tm.SubclassedSparseSeries([5., 7., 9.])
63+
tm.assert_sp_series_equal(s1 + s2, exp)
5964

6065
def test_subclass_sparse_to_frame(self):
6166
s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')

‎setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ def is_platform_mac():
108108
_pxipath = pjoin('pandas', 'src')
109109
_pxifiles = ['algos_common_helper.pxi.in', 'algos_groupby_helper.pxi.in',
110110
'algos_join_helper.pxi.in', 'algos_take_helper.pxi.in',
111-
'hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in']
111+
'hashtable_class_helper.pxi.in', 'hashtable_func_helper.pxi.in',
112+
'sparse_op_helper.pxi.in']
112113

113114

114115
class build_ext(_build_ext):

0 commit comments

Comments
 (0)
Please sign in to comment.