Skip to content

Support of alternative array classes: ndarray-like #305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 15, 2022
Merged
5 changes: 4 additions & 1 deletion docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ Unreleased
By :user:`Haiying Xu <halehawk>`, `John Kirkham <jakirkham>`, `Ryan Abernathey <rabernat>` :
issue:`303`.

.. _release_0.9.1:
* Add support of alternative array classes (other than NumPy arrays)
By :user:`Mads R. B. Kristensen <madsbk>`, :issue:`305`.

* Add ability to find codecs via entrypoints
By :user:`Martin Durant <martindurant>`, :issue:`290`.

.. _release_0.9.1:

0.9.1
-----

Expand Down
158 changes: 104 additions & 54 deletions numcodecs/compat.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,120 @@
# flake8: noqa
import functools
import sys
import codecs
import array
from functools import reduce

import numpy as np

from .ndarray_like import NDArrayLike, is_ndarray_like

def ensure_ndarray(buf):
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
numpy array.

def ensure_ndarray_like(buf) -> NDArrayLike:
"""Convenience function to coerce `buf` to ndarray-like array.

Parameters
----------
buf : array-like or bytes-like
A numpy array or any object exporting a buffer interface.
buf : ndarray-like, array-like, or bytes-like
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
any object exporting a buffer interface.

Returns
-------
arr : ndarray
A numpy array, sharing memory with `buf`.
arr : NDArrayLike
A ndarray-like, sharing memory with `buf`.

Notes
-----
This function will not create a copy under any circumstances, it is guaranteed to
return a view on memory exported by `buf`.

"""

if isinstance(buf, np.ndarray):
# already a numpy array
arr = buf
if not is_ndarray_like(buf):
if isinstance(buf, array.array) and buf.typecode in "cu":
# Guard condition, do not support array.array with unicode type, this is
# problematic because numpy does not support it on all platforms. Also do not
# support char as it was removed in Python 3.
raise TypeError("array.array with char or unicode type is not supported")
else:
# N.B., first take a memoryview to make sure that we subsequently create a
# numpy array from a memory buffer with no copy
mem = memoryview(buf)
# instantiate array from memoryview, ensures no copy
buf = np.array(mem, copy=False)
return buf

elif isinstance(buf, array.array) and buf.typecode in 'cu':
# Guard condition, do not support array.array with unicode type, this is
# problematic because numpy does not support it on all platforms. Also do not
# support char as it was removed in Python 3.
raise TypeError('array.array with char or unicode type is not supported')

else:
def ensure_ndarray(buf) -> np.ndarray:
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
numpy array.

# N.B., first take a memoryview to make sure that we subsequently create a
# numpy array from a memory buffer with no copy
mem = memoryview(buf)
Parameters
----------
buf : array-like or bytes-like
A numpy array or any object exporting a buffer interface.

# instantiate array from memoryview, ensures no copy
arr = np.array(mem, copy=False)
Returns
-------
arr : ndarray
A numpy array, sharing memory with `buf`.

return arr
Notes
-----
This function will not create a copy under any circumstances, it is guaranteed to
return a view on memory exported by `buf`.
"""
return np.array(ensure_ndarray_like(buf), copy=False)


def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True):
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
numpy array. Also ensures that the returned value exports fully contiguous memory,
def ensure_contiguous_ndarray_like(
buf, max_buffer_size=None, flatten=True
) -> NDArrayLike:
"""Convenience function to coerce `buf` to ndarray-like array.
Also ensures that the returned value exports fully contiguous memory,
and supports the new-style buffer interface. If the optional max_buffer_size is
provided, raise a ValueError if the number of bytes consumed by the returned
array exceeds this value.

Parameters
----------
buf : array-like or bytes-like
A numpy array or any object exporting a buffer interface.
buf : ndarray-like, array-like, or bytes-like
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
any object exporting a buffer interface.
max_buffer_size : int
If specified, the largest allowable value of arr.nbytes, where arr
is the returned array.
flatten : bool
If True, the array are flatten.

Returns
-------
arr : ndarray
A numpy array, sharing memory with `buf`.
arr : NDArrayLike
A ndarray-like, sharing memory with `buf`.

Notes
-----
This function will not create a copy under any circumstances, it is guaranteed to
return a view on memory exported by `buf`.

"""

# ensure input is a numpy array
arr = ensure_ndarray(buf)
arr = ensure_ndarray_like(buf)

# check for object arrays, these are just memory pointers, actual memory holding
# item data is scattered elsewhere
if arr.dtype == object:
raise TypeError('object arrays are not supported')
raise TypeError("object arrays are not supported")

# check for datetime or timedelta ndarray, the buffer interface doesn't support those
if arr.dtype.kind in 'Mm':
if arr.dtype.kind in "Mm":
arr = arr.view(np.int64)

# check memory is contiguous, if so flatten
if arr.flags.c_contiguous or arr.flags.f_contiguous:
# check if flatten flag is on or not
if flatten:
# can flatten without copy
arr = arr.reshape(-1, order='A')

arr = arr.reshape(-1, order="A")
else:
raise ValueError('an array with contiguous memory is required')
raise ValueError("an array with contiguous memory is required")

if max_buffer_size is not None and arr.nbytes > max_buffer_size:
msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size)
Expand All @@ -106,45 +123,78 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True):
return arr


def ensure_bytes(buf):
def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.array:
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
numpy array. Also ensures that the returned value exports fully contiguous memory,
and supports the new-style buffer interface. If the optional max_buffer_size is
provided, raise a ValueError if the number of bytes consumed by the returned
array exceeds this value.

Parameters
----------
buf : array-like or bytes-like
A numpy array or any object exporting a buffer interface.
max_buffer_size : int
If specified, the largest allowable value of arr.nbytes, where arr
is the returned array.
flatten : bool
If True, the array are flatten.

Returns
-------
arr : ndarray
A numpy array, sharing memory with `buf`.

Notes
-----
This function will not create a copy under any circumstances, it is guaranteed to
return a view on memory exported by `buf`.
"""

return ensure_ndarray(
ensure_contiguous_ndarray_like(
buf, max_buffer_size=max_buffer_size, flatten=flatten
)
)


def ensure_bytes(buf) -> bytes:
"""Obtain a bytes object from memory exposed by `buf`."""

if not isinstance(buf, bytes):

# go via numpy, for convenience
arr = ensure_ndarray(buf)
arr = ensure_ndarray_like(buf)

# check for object arrays, these are just memory pointers,
# actual memory holding item data is scattered elsewhere
if arr.dtype == object:
raise TypeError('object arrays are not supported')
raise TypeError("object arrays are not supported")

# create bytes
buf = arr.tobytes(order='A')
buf = arr.tobytes(order="A")

return buf


def ensure_text(s, encoding='utf-8'):
def ensure_text(s, encoding="utf-8"):
if not isinstance(s, str):
s = ensure_contiguous_ndarray(s)
s = codecs.decode(s, encoding)
return s


def ndarray_copy(src, dst):
def ndarray_copy(src, dst) -> NDArrayLike:
"""Copy the contents of the array from `src` to `dst`."""

if dst is None:
# no-op
return src

# ensure ndarrays
src = ensure_ndarray(src)
dst = ensure_ndarray(dst)
# ensure ndarray like
src = ensure_ndarray_like(src)
dst = ensure_ndarray_like(dst)

# flatten source array
src = src.reshape(-1, order='A')
src = src.reshape(-1, order="A")

# ensure same data type
if dst.dtype != object:
Expand All @@ -153,9 +203,9 @@ def ndarray_copy(src, dst):
# reshape source to match destination
if src.shape != dst.shape:
if dst.flags.f_contiguous:
order = 'F'
order = "F"
else:
order = 'C'
order = "C"
src = src.reshape(dst.shape, order=order)

# copy via numpy
Expand Down
56 changes: 56 additions & 0 deletions numcodecs/ndarray_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import sys
from typing import Any, Optional, Tuple

if sys.version_info >= (3, 8):
from typing import Protocol, runtime_checkable
else:
from typing_extensions import Protocol, runtime_checkable


@runtime_checkable
class DType(Protocol):
itemsize: int
name: str
kind: str


@runtime_checkable
class FlagsObj(Protocol):
c_contiguous: bool
f_contiguous: bool
owndata: bool


@runtime_checkable
class NDArrayLike(Protocol):
dtype: DType
shape: Tuple[int, ...]
strides: Tuple[int, ...]
ndim: int
size: int
itemsize: int
nbytes: int
flags: FlagsObj

def __len__(self) -> int:
...

def __getitem__(self, key) -> Any:
...

def __setitem__(self, key, value):
...

def tobytes(self, order: Optional[str] = ...) -> bytes:
...

def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike":
...

def view(self, dtype: DType = ...) -> "NDArrayLike":
...


def is_ndarray_like(obj: object) -> bool:
"""Return True when `obj` is ndarray-like"""
return isinstance(obj, NDArrayLike)
15 changes: 15 additions & 0 deletions numcodecs/tests/test_ndarray_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytest

from numcodecs.ndarray_like import NDArrayLike


@pytest.mark.parametrize("module", ["numpy", "cupy"])
def test_is_ndarray_like(module):
m = pytest.importorskip(module)
a = m.arange(10)
assert isinstance(a, NDArrayLike)


def test_is_not_ndarray_like():
assert not isinstance([1, 2, 3], NDArrayLike)
assert not isinstance(b"1,2,3", NDArrayLike)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ numpy
msgpack
pytest
zfpy
typing-extensions
3 changes: 1 addition & 2 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ Cython==0.29.21
msgpack==1.0.2
numpy==1.21.0
zfpy==0.5.5


typing-extensions>=3.7.4
1 change: 1 addition & 0 deletions requirements_rtfd.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ mock
numpy
cython
zfpy==0.5.5
typing-extensions
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ def run_setup(with_extensions):
],
install_requires=[
'numpy>=1.7',
'typing-extensions>=3.7.4',
],
extras_require={
'msgpack': ["msgpack"],
Expand Down