diff --git a/docs/release.rst b/docs/release.rst index d9473f65..bf2daa79 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -10,11 +10,14 @@ Unreleased By :user:`Haiying Xu `, `John Kirkham `, `Ryan Abernathey ` : issue:`303`. -.. _release_0.9.1: +* Add support of alternative array classes (other than NumPy arrays) + By :user:`Mads R. B. Kristensen `, :issue:`305`. * Add ability to find codecs via entrypoints By :user:`Martin Durant `, :issue:`290`. +.. _release_0.9.1: + 0.9.1 ----- diff --git a/numcodecs/compat.py b/numcodecs/compat.py index b1187838..2b9f776f 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -1,4 +1,5 @@ # flake8: noqa +import functools import sys import codecs import array @@ -6,98 +7,114 @@ import numpy as np +from .ndarray_like import NDArrayLike, is_ndarray_like -def ensure_ndarray(buf): - """Convenience function to coerce `buf` to a numpy array, if it is not already a - numpy array. + +def ensure_ndarray_like(buf) -> NDArrayLike: + """Convenience function to coerce `buf` to ndarray-like array. Parameters ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. Returns ------- - arr : ndarray - A numpy array, sharing memory with `buf`. + arr : NDArrayLike + A ndarray-like, sharing memory with `buf`. Notes ----- This function will not create a copy under any circumstances, it is guaranteed to return a view on memory exported by `buf`. - """ - if isinstance(buf, np.ndarray): - # already a numpy array - arr = buf + if not is_ndarray_like(buf): + if isinstance(buf, array.array) and buf.typecode in "cu": + # Guard condition, do not support array.array with unicode type, this is + # problematic because numpy does not support it on all platforms. Also do not + # support char as it was removed in Python 3. + raise TypeError("array.array with char or unicode type is not supported") + else: + # N.B., first take a memoryview to make sure that we subsequently create a + # numpy array from a memory buffer with no copy + mem = memoryview(buf) + # instantiate array from memoryview, ensures no copy + buf = np.array(mem, copy=False) + return buf - elif isinstance(buf, array.array) and buf.typecode in 'cu': - # Guard condition, do not support array.array with unicode type, this is - # problematic because numpy does not support it on all platforms. Also do not - # support char as it was removed in Python 3. - raise TypeError('array.array with char or unicode type is not supported') - else: +def ensure_ndarray(buf) -> np.ndarray: + """Convenience function to coerce `buf` to a numpy array, if it is not already a + numpy array. - # N.B., first take a memoryview to make sure that we subsequently create a - # numpy array from a memory buffer with no copy - mem = memoryview(buf) + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. - # instantiate array from memoryview, ensures no copy - arr = np.array(mem, copy=False) + Returns + ------- + arr : ndarray + A numpy array, sharing memory with `buf`. - return arr + Notes + ----- + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. + """ + return np.array(ensure_ndarray_like(buf), copy=False) -def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True): - """Convenience function to coerce `buf` to a numpy array, if it is not already a - numpy array. Also ensures that the returned value exports fully contiguous memory, +def ensure_contiguous_ndarray_like( + buf, max_buffer_size=None, flatten=True +) -> NDArrayLike: + """Convenience function to coerce `buf` to ndarray-like array. + Also ensures that the returned value exports fully contiguous memory, and supports the new-style buffer interface. If the optional max_buffer_size is provided, raise a ValueError if the number of bytes consumed by the returned array exceeds this value. Parameters ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. max_buffer_size : int If specified, the largest allowable value of arr.nbytes, where arr is the returned array. + flatten : bool + If True, the array are flatten. Returns ------- - arr : ndarray - A numpy array, sharing memory with `buf`. + arr : NDArrayLike + A ndarray-like, sharing memory with `buf`. Notes ----- This function will not create a copy under any circumstances, it is guaranteed to return a view on memory exported by `buf`. - """ - - # ensure input is a numpy array - arr = ensure_ndarray(buf) + arr = ensure_ndarray_like(buf) # check for object arrays, these are just memory pointers, actual memory holding # item data is scattered elsewhere if arr.dtype == object: - raise TypeError('object arrays are not supported') + raise TypeError("object arrays are not supported") # check for datetime or timedelta ndarray, the buffer interface doesn't support those - if arr.dtype.kind in 'Mm': + if arr.dtype.kind in "Mm": arr = arr.view(np.int64) # check memory is contiguous, if so flatten if arr.flags.c_contiguous or arr.flags.f_contiguous: - # check if flatten flag is on or not if flatten: # can flatten without copy - arr = arr.reshape(-1, order='A') - + arr = arr.reshape(-1, order="A") else: - raise ValueError('an array with contiguous memory is required') + raise ValueError("an array with contiguous memory is required") if max_buffer_size is not None and arr.nbytes > max_buffer_size: msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size) @@ -106,45 +123,78 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True): return arr -def ensure_bytes(buf): +def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.array: + """Convenience function to coerce `buf` to a numpy array, if it is not already a + numpy array. Also ensures that the returned value exports fully contiguous memory, + and supports the new-style buffer interface. If the optional max_buffer_size is + provided, raise a ValueError if the number of bytes consumed by the returned + array exceeds this value. + + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. + max_buffer_size : int + If specified, the largest allowable value of arr.nbytes, where arr + is the returned array. + flatten : bool + If True, the array are flatten. + + Returns + ------- + arr : ndarray + A numpy array, sharing memory with `buf`. + + Notes + ----- + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. + """ + + return ensure_ndarray( + ensure_contiguous_ndarray_like( + buf, max_buffer_size=max_buffer_size, flatten=flatten + ) + ) + + +def ensure_bytes(buf) -> bytes: """Obtain a bytes object from memory exposed by `buf`.""" if not isinstance(buf, bytes): - - # go via numpy, for convenience - arr = ensure_ndarray(buf) + arr = ensure_ndarray_like(buf) # check for object arrays, these are just memory pointers, # actual memory holding item data is scattered elsewhere if arr.dtype == object: - raise TypeError('object arrays are not supported') + raise TypeError("object arrays are not supported") # create bytes - buf = arr.tobytes(order='A') + buf = arr.tobytes(order="A") return buf -def ensure_text(s, encoding='utf-8'): +def ensure_text(s, encoding="utf-8"): if not isinstance(s, str): s = ensure_contiguous_ndarray(s) s = codecs.decode(s, encoding) return s -def ndarray_copy(src, dst): +def ndarray_copy(src, dst) -> NDArrayLike: """Copy the contents of the array from `src` to `dst`.""" if dst is None: # no-op return src - # ensure ndarrays - src = ensure_ndarray(src) - dst = ensure_ndarray(dst) + # ensure ndarray like + src = ensure_ndarray_like(src) + dst = ensure_ndarray_like(dst) # flatten source array - src = src.reshape(-1, order='A') + src = src.reshape(-1, order="A") # ensure same data type if dst.dtype != object: @@ -153,9 +203,9 @@ def ndarray_copy(src, dst): # reshape source to match destination if src.shape != dst.shape: if dst.flags.f_contiguous: - order = 'F' + order = "F" else: - order = 'C' + order = "C" src = src.reshape(dst.shape, order=order) # copy via numpy diff --git a/numcodecs/ndarray_like.py b/numcodecs/ndarray_like.py new file mode 100644 index 00000000..2ff00772 --- /dev/null +++ b/numcodecs/ndarray_like.py @@ -0,0 +1,56 @@ +import sys +from typing import Any, Optional, Tuple + +if sys.version_info >= (3, 8): + from typing import Protocol, runtime_checkable +else: + from typing_extensions import Protocol, runtime_checkable + + +@runtime_checkable +class DType(Protocol): + itemsize: int + name: str + kind: str + + +@runtime_checkable +class FlagsObj(Protocol): + c_contiguous: bool + f_contiguous: bool + owndata: bool + + +@runtime_checkable +class NDArrayLike(Protocol): + dtype: DType + shape: Tuple[int, ...] + strides: Tuple[int, ...] + ndim: int + size: int + itemsize: int + nbytes: int + flags: FlagsObj + + def __len__(self) -> int: + ... + + def __getitem__(self, key) -> Any: + ... + + def __setitem__(self, key, value): + ... + + def tobytes(self, order: Optional[str] = ...) -> bytes: + ... + + def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": + ... + + def view(self, dtype: DType = ...) -> "NDArrayLike": + ... + + +def is_ndarray_like(obj: object) -> bool: + """Return True when `obj` is ndarray-like""" + return isinstance(obj, NDArrayLike) diff --git a/numcodecs/tests/test_ndarray_like.py b/numcodecs/tests/test_ndarray_like.py new file mode 100644 index 00000000..d4e63963 --- /dev/null +++ b/numcodecs/tests/test_ndarray_like.py @@ -0,0 +1,15 @@ +import pytest + +from numcodecs.ndarray_like import NDArrayLike + + +@pytest.mark.parametrize("module", ["numpy", "cupy"]) +def test_is_ndarray_like(module): + m = pytest.importorskip(module) + a = m.arange(10) + assert isinstance(a, NDArrayLike) + + +def test_is_not_ndarray_like(): + assert not isinstance([1, 2, 3], NDArrayLike) + assert not isinstance(b"1,2,3", NDArrayLike) diff --git a/requirements.txt b/requirements.txt index 3fe95a08..86d154b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ numpy msgpack pytest zfpy +typing-extensions diff --git a/requirements_dev.txt b/requirements_dev.txt index 1717cc90..e9601702 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,5 +2,4 @@ Cython==0.29.21 msgpack==1.0.2 numpy==1.21.0 zfpy==0.5.5 - - +typing-extensions>=3.7.4 diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index 8854bded..f6b12f27 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -7,3 +7,4 @@ mock numpy cython zfpy==0.5.5 +typing-extensions diff --git a/setup.py b/setup.py index c8622fd6..541c11ac 100644 --- a/setup.py +++ b/setup.py @@ -338,6 +338,7 @@ def run_setup(with_extensions): ], install_requires=[ 'numpy>=1.7', + 'typing-extensions>=3.7.4', ], extras_require={ 'msgpack': ["msgpack"],