Skip to content

Remove auto densification and unify operator code. #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 27, 2017
122 changes: 73 additions & 49 deletions sparse/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,9 @@ def reshape(self, shape):
# TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
linear_loc = self.linear_loc()

coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(shape)))
max_shape = max(shape) if len(shape) != 0 else 1

coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max_shape - 1))
strides = 1
for i, d in enumerate(shape[::-1]):
coords[-(i + 1), :] = (linear_loc // strides) % d
Expand Down Expand Up @@ -580,31 +582,22 @@ def sum_duplicates(self):
return self

def __add__(self, other):
if isinstance(other, numbers.Number) and other == 0:
return self
if not isinstance(other, COO):
return self.maybe_densify() + other
else:
return self.elemwise_binary(operator.add, other)
return self.elemwise(operator.add, other)

def __radd__(self, other):
return self + other
__radd__ = __add__

def __neg__(self):
return COO(self.coords, -self.data, self.shape, self.has_duplicates,
self.sorted)

def __sub__(self, other):
return self + (-other)
return self.elemwise(operator.sub, other)

def __rsub__(self, other):
return -self + other
return -(self - other)

def __mul__(self, other):
if isinstance(other, COO):
return self.elemwise_binary(operator.mul, other)
else:
return self.elemwise(operator.mul, other)
return self.elemwise(operator.mul, other)

__rmul__ = __mul__

Expand All @@ -620,32 +613,86 @@ def __pow__(self, other):
return self.elemwise(operator.pow, other)

def __and__(self, other):
return self.elemwise_binary(operator.and_, other)
return self.elemwise(operator.and_, other)

def __xor__(self, other):
return self.elemwise_binary(operator.xor, other)
return self.elemwise(operator.xor, other)

def __or__(self, other):
return self.elemwise_binary(operator.or_, other)
return self.elemwise(operator.or_, other)

def __gt__(self, other):
return self.elemwise(operator.gt, other)

def __ge__(self, other):
return self.elemwise(operator.ge, other)

def __lt__(self, other):
return self.elemwise(operator.lt, other)

def __le__(self, other):
return self.elemwise(operator.le, other)

def __eq__(self, other):
return self.elemwise(operator.eq, other)

def __ne__(self, other):
return self.elemwise(operator.ne, other)

def elemwise(self, func, *args, **kwargs):
"""
Apply a function to one or two arguments.

Parameters
----------
func
The function to apply to one or two arguments.
args : tuple, optional
The extra arguments to pass to the function. If args[0] is a COO object
or a scipy.sparse.spmatrix, the function will be treated as a binary
function. Otherwise, it will be treated as a unary function.
kwargs : dict, optional
The kwargs to pass to the function.

Returns
-------
COO
The result of applying the function.
"""
if len(args) == 0:
return self._elemwise_unary(func, *args, **kwargs)
else:
other = args[0]
if isinstance(other, COO):
return self._elemwise_binary(func, *args, **kwargs)
elif isinstance(other, scipy.sparse.spmatrix):
other = COO.from_scipy_sparse(other)
return self._elemwise_binary(func, other, *args[1:], **kwargs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch could use a test

else:
return self._elemwise_unary(func, *args, **kwargs)

def _elemwise_unary(self, func, *args, **kwargs):
check = kwargs.pop('check', True)
data_zero = _zero_of_dtype(self.dtype)
func_zero = _zero_of_dtype(func(data_zero, *args, **kwargs).dtype)
if check and func(data_zero, *args, **kwargs) != func_zero:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
return COO(self.coords, func(self.data, *args, **kwargs),

data_func = func(self.data, *args, **kwargs)
nonzero = data_func != func_zero

return COO(self.coords[:, nonzero], data_func[nonzero],
shape=self.shape,
has_duplicates=self.has_duplicates,
sorted=self.sorted)

def elemwise_binary(self, func, other, *args, **kwargs):
def _elemwise_binary(self, func, other, *args, **kwargs):
assert isinstance(other, COO)
check = kwargs.pop('check', True)
self_zero = _zero_of_dtype(self.dtype)
other_zero = _zero_of_dtype(other.dtype)
check = kwargs.pop('check', True)
func_zero = _zero_of_dtype(func(self_zero, other_zero, * args, **kwargs).dtype)
func_zero = _zero_of_dtype(func(self_zero, other_zero, *args, **kwargs).dtype)
if check and func(self_zero, other_zero, *args, **kwargs) != func_zero:
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
Expand Down Expand Up @@ -690,12 +737,6 @@ def elemwise_binary(self, func, other, *args, **kwargs):
matched_self, matched_other = _match_arrays(self_reduced_linear,
other_reduced_linear)

# Locate coordinates without a match
unmatched_self = np.ones(self.nnz, dtype=np.bool)
unmatched_self[matched_self] = False
unmatched_other = np.ones(other.nnz, dtype=np.bool)
unmatched_other[matched_other] = False

# Start with an empty list. This may reduce computation in many cases.
data_list = []
coords_list = []
Expand All @@ -711,11 +752,10 @@ def elemwise_binary(self, func, other, *args, **kwargs):
coords_list.append(matched_coords)

self_func = func(self_data, other_zero, *args, **kwargs)

# Add unmatched parts as necessary.
if (self_func != func_zero).any():
self_unmatched_coords, self_unmatched_func = \
self._get_unmatched_coords_data(self_coords, self_data, self_shape,
self._get_unmatched_coords_data(self_coords, self_func, self_shape,
result_shape, matched_self,
matched_coords)

Expand All @@ -726,7 +766,7 @@ def elemwise_binary(self, func, other, *args, **kwargs):

if (other_func != func_zero).any():
other_unmatched_coords, other_unmatched_func = \
self._get_unmatched_coords_data(other_coords, other_data, other_shape,
self._get_unmatched_coords_data(other_coords, other_func, other_shape,
result_shape, matched_other,
matched_coords)

Expand Down Expand Up @@ -1067,7 +1107,7 @@ def __abs__(self):

def exp(self, out=None):
assert out is None
return np.exp(self.maybe_densify())
return self.elemwise(np.exp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this always err (given current code)

(not a concern, just asking a question)

Copy link
Collaborator Author

@hameerabbasi hameerabbasi Dec 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. But given what we talked about in #10 it's probably best to keep it.

Edit: If you really need exp you can do np.exp(x.maybe_densify()) or np.exp(x.todense()).


def expm1(self, out=None):
assert out is None
Expand Down Expand Up @@ -1123,23 +1163,7 @@ def conjugate(self, out=None):

def astype(self, dtype, out=None):
assert out is None
return self.elemwise(np.ndarray.astype, dtype, check=False)

def __gt__(self, other):
if not isinstance(other, numbers.Number):
raise NotImplementedError("Only scalars supported")
if other < 0:
raise ValueError("Comparison with negative number would produce "
"dense result")
return self.elemwise(operator.gt, other)

def __ge__(self, other):
if not isinstance(other, numbers.Number):
raise NotImplementedError("Only scalars supported")
if other <= 0:
raise ValueError("Comparison with negative number would produce "
"dense result")
return self.elemwise(operator.ge, other)
return self.elemwise(np.ndarray.astype, dtype)

def maybe_densify(self, allowed_nnz=1e3, allowed_fraction=0.25):
""" Convert to a dense numpy array if not too costly. Err othrewise """
Expand Down
102 changes: 84 additions & 18 deletions sparse/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,10 @@ def test_elemwise(func):
assert_eq(func(x), func(s))


@pytest.mark.parametrize('func', [operator.mul, operator.add])
@pytest.mark.parametrize('func', [
operator.mul, operator.add, operator.sub, operator.gt,
operator.lt, operator.ne
])
@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
def test_elemwise_binary(func, shape):
x = random_x(shape)
Expand All @@ -197,6 +200,80 @@ def test_elemwise_binary(func, shape):
assert_eq(func(xs, ys), func(x, y))


@pytest.mark.parametrize('func', [
operator.pow, operator.truediv, operator.floordiv,
operator.ge, operator.le, operator.eq
])
@pytest.mark.filterwarnings('ignore:divide by zero')
@pytest.mark.filterwarnings('ignore:invalid value')
def test_auto_densification_fails(func):
xs = COO.from_numpy(random_x((2, 3, 4)))
ys = COO.from_numpy(random_x((2, 3, 4)))

with pytest.raises(ValueError):
func(xs, ys)


def test_op_scipy_sparse():
x = random_x((3, 4))
y = random_x((3, 4))

xs = COO.from_numpy(x)
ys = scipy.sparse.csr_matrix(y)

assert_eq(x + y, xs + ys)


@pytest.mark.parametrize('func, scalar', [
(operator.mul, 5),
(operator.add, 0),
(operator.sub, 0),
(operator.pow, 5),
(operator.truediv, 3),
(operator.floordiv, 4),
(operator.gt, 5),
(operator.lt, -5),
(operator.ne, 0),
(operator.ge, 5),
(operator.le, -3),
(operator.eq, 1)
])
def test_elemwise_scalar(func, scalar):
x = random_x((2, 3, 4))
y = scalar

xs = COO.from_numpy(x)
fs = func(xs, y)

assert isinstance(fs, COO)
assert xs.nnz >= fs.nnz

assert_eq(fs, func(x, y))


@pytest.mark.parametrize('func, scalar', [
(operator.add, 5),
(operator.sub, -5),
(operator.pow, -3),
(operator.truediv, 0),
(operator.floordiv, 0),
(operator.gt, -5),
(operator.lt, 5),
(operator.ne, 1),
(operator.ge, -3),
(operator.le, 3),
(operator.eq, 0)
])
@pytest.mark.filterwarnings('ignore:divide by zero')
@pytest.mark.filterwarnings('ignore:invalid value')
def test_scalar_densification_fails(func, scalar):
xs = COO.from_numpy(random_x((2, 3, 4)))
y = scalar

with pytest.raises(ValueError):
func(xs, y)


@pytest.mark.parametrize('func', [operator.and_, operator.or_, operator.xor])
@pytest.mark.parametrize('shape', [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)])
def test_bitwise_binary(func, shape):
Expand Down Expand Up @@ -399,30 +476,19 @@ def test_addition():

assert_eq(x + y, a + b)
assert_eq(x - y, a - b)
assert_eq(-x, -a)


def test_addition_ok_when_mostly_dense():
x = np.arange(10)
y = COO.from_numpy(x)

assert_eq(x + 1, y + 1)
assert_eq(x - 1, y - 1)
assert_eq(1 - x, 1 - y)
assert_eq(np.exp(x), np.exp(y))


def test_addition_not_ok_when_large_and_sparse():
x = COO({(0, 0): 1}, shape=(1000000, 1000000))
with pytest.raises(Exception):
with pytest.raises(ValueError):
x + 1
with pytest.raises(Exception):
with pytest.raises(ValueError):
1 + x
with pytest.raises(Exception):
with pytest.raises(ValueError):
1 - x
with pytest.raises(Exception):
with pytest.raises(ValueError):
x - 1
with pytest.raises(Exception):
with pytest.raises(ValueError):
np.exp(x)


Expand Down Expand Up @@ -537,7 +603,7 @@ def test_cache_csr():


def test_empty_shape():
x = COO([], [1.0])
x = COO(np.empty((0, 1), dtype=np.int8), [1.0])
assert x.shape == ()
assert ((2 * x).todense() == np.array(2.0)).all()

Expand Down