From b909eb97ea64ede5d55b860ab061de36d7ca0fc3 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Feb 2022 14:41:52 +0100 Subject: [PATCH 1/6] Implement NDArrayLike --- numcodecs/compat.py | 83 +++++++++++++++++++++------------------ numcodecs/ndarray_like.py | 79 +++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 39 deletions(-) create mode 100644 numcodecs/ndarray_like.py diff --git a/numcodecs/compat.py b/numcodecs/compat.py index f414470b..81f17852 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -6,8 +6,10 @@ import numpy as np +from .ndarray_like import NDArrayLike, ensure_memtype -def ensure_ndarray(buf): + +def ensure_ndarray(buf, memtype="cpu") -> NDArrayLike: """Convenience function to coerce `buf` to a numpy array, if it is not already a numpy array. @@ -27,30 +29,28 @@ def ensure_ndarray(buf): return a view on memory exported by `buf`. """ + if not isinstance(buf, NDArrayLike): + if isinstance(buf, array.array) and buf.typecode in "cu": + # Guard condition, do not support array.array with unicode type, this is + # problematic because numpy does not support it on all platforms. Also do not + # support char as it was removed in Python 3. + raise TypeError("array.array with char or unicode type is not supported") + else: + # N.B., first take a memoryview to make sure that we subsequently create a + # numpy array from a memory buffer with no copy + mem = memoryview(buf) - if isinstance(buf, np.ndarray): - # already a numpy array - arr = buf - - elif isinstance(buf, array.array) and buf.typecode in 'cu': - # Guard condition, do not support array.array with unicode type, this is - # problematic because numpy does not support it on all platforms. Also do not - # support char as it was removed in Python 3. - raise TypeError('array.array with char or unicode type is not supported') - - else: + # instantiate array from memoryview, ensures no copy + buf = np.array(mem, copy=False) - # N.B., first take a memoryview to make sure that we subsequently create a - # numpy array from a memory buffer with no copy - mem = memoryview(buf) + return ensure_memtype(buf, memtype=memtype) - # instantiate array from memoryview, ensures no copy - arr = np.array(mem, copy=False) - return arr +def ensure_ndarray_like(buf, memtype=None) -> NDArrayLike: + return ensure_ndarray(buf, memtype=memtype) -def ensure_contiguous_ndarray(buf, max_buffer_size=None): +def ensure_contiguous_ndarray(buf, max_buffer_size=None, memtype="cpu") -> NDArrayLike: """Convenience function to coerce `buf` to a numpy array, if it is not already a numpy array. Also ensures that the returned value exports fully contiguous memory, and supports the new-style buffer interface. If the optional max_buffer_size is @@ -78,24 +78,23 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None): """ # ensure input is a numpy array - arr = ensure_ndarray(buf) + arr = ensure_ndarray(buf, memtype=memtype) # check for object arrays, these are just memory pointers, actual memory holding # item data is scattered elsewhere if arr.dtype == object: - raise TypeError('object arrays are not supported') + raise TypeError("object arrays are not supported") # check for datetime or timedelta ndarray, the buffer interface doesn't support those - if arr.dtype.kind in 'Mm': + if arr.dtype.kind in "Mm": arr = arr.view(np.int64) # check memory is contiguous, if so flatten if arr.flags.c_contiguous or arr.flags.f_contiguous: # can flatten without copy - arr = arr.reshape(-1, order='A') - + arr = arr.reshape(-1, order="A") else: - raise ValueError('an array with contiguous memory is required') + raise ValueError("an array with contiguous memory is required") if max_buffer_size is not None and arr.nbytes > max_buffer_size: msg = "Codec does not support buffers of > {} bytes".format(max_buffer_size) @@ -104,45 +103,51 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None): return arr -def ensure_bytes(buf): +def ensure_contiguous_ndarray_like( + buf, max_buffer_size=None, memtype=None +) -> NDArrayLike: + return ensure_contiguous_ndarray( + buf, max_buffer_size=max_buffer_size, memtype=memtype + ) + + +def ensure_bytes(buf) -> bytes: """Obtain a bytes object from memory exposed by `buf`.""" if not isinstance(buf, bytes): - - # go via numpy, for convenience - arr = ensure_ndarray(buf) + arr = ensure_ndarray_like(buf) # check for object arrays, these are just memory pointers, # actual memory holding item data is scattered elsewhere if arr.dtype == object: - raise TypeError('object arrays are not supported') + raise TypeError("object arrays are not supported") # create bytes - buf = arr.tobytes(order='A') + buf = arr.tobytes(order="A") return buf -def ensure_text(s, encoding='utf-8'): +def ensure_text(s, encoding="utf-8"): if not isinstance(s, str): s = ensure_contiguous_ndarray(s) s = codecs.decode(s, encoding) return s -def ndarray_copy(src, dst): +def ndarray_copy(src, dst) -> NDArrayLike: """Copy the contents of the array from `src` to `dst`.""" if dst is None: # no-op return src - # ensure ndarrays - src = ensure_ndarray(src) - dst = ensure_ndarray(dst) + # ensure ndarray like + src = ensure_ndarray_like(src) + dst = ensure_ndarray_like(dst) # flatten source array - src = src.reshape(-1, order='A') + src = src.reshape(-1, order="A") # ensure same data type if dst.dtype != object: @@ -151,9 +156,9 @@ def ndarray_copy(src, dst): # reshape source to match destination if src.shape != dst.shape: if dst.flags.f_contiguous: - order = 'F' + order = "F" else: - order = 'C' + order = "C" src = src.reshape(dst.shape, order=order) # copy via numpy diff --git a/numcodecs/ndarray_like.py b/numcodecs/ndarray_like.py new file mode 100644 index 00000000..15773998 --- /dev/null +++ b/numcodecs/ndarray_like.py @@ -0,0 +1,79 @@ +import sys +from collections import defaultdict +from typing import Any, Callable, DefaultDict, Dict, Mapping, Optional, Tuple + +import numpy as np + +if sys.version_info >= (3, 8): + from typing import Protocol, runtime_checkable +else: + from typing_extensions import Protocol, runtime_checkable + + +@runtime_checkable +class DType(Protocol): + itemsize: int + name: str + kind: str + + +@runtime_checkable +class FlagsObj(Protocol): + c_contiguous: bool + f_contiguous: bool + owndata: bool + + +@runtime_checkable +class NDArrayLike(Protocol): + dtype: DType + shape: Tuple[int, ...] + strides: Tuple[int, ...] + ndim: int + size: int + itemsize: int + nbytes: int + flags: FlagsObj + + def __len__(self) -> int: + ... + + def __getitem__(self, key) -> Any: + ... + + def __setitem__(self, key, value): + ... + + def tobytes(self, order: Optional[str] = ...) -> bytes: + ... + + def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": + ... + + def view(self, dtype: DType = ...) -> "NDArrayLike": + ... + + +ConvertFunc = Callable[[NDArrayLike], NDArrayLike] + +_ndarray_like_registry: DefaultDict[type, Dict[str, ConvertFunc]] = defaultdict(dict) + + +def register_ndarray_like(cls, convert_dict: Mapping[str, ConvertFunc]) -> None: + _ndarray_like_registry[cls].update(convert_dict) + + +def ensure_memtype(ary: NDArrayLike, memtype=Optional[str]) -> NDArrayLike: + if memtype is None: + return ary + return _ndarray_like_registry[ary.__class__][memtype](ary) + + +register_ndarray_like(np.ndarray, {"cpu": lambda x: x}) + +try: + import cupy +except ImportError: + pass +else: + register_ndarray_like(cupy.ndarray, {"cpu": cupy.asnumpy}) From a05bdac3c15548939adb4f9ce56c31b494f806a1 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Feb 2022 15:02:16 +0100 Subject: [PATCH 2/6] small test --- numcodecs/tests/test_ndarray_like.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 numcodecs/tests/test_ndarray_like.py diff --git a/numcodecs/tests/test_ndarray_like.py b/numcodecs/tests/test_ndarray_like.py new file mode 100644 index 00000000..d4e63963 --- /dev/null +++ b/numcodecs/tests/test_ndarray_like.py @@ -0,0 +1,15 @@ +import pytest + +from numcodecs.ndarray_like import NDArrayLike + + +@pytest.mark.parametrize("module", ["numpy", "cupy"]) +def test_is_ndarray_like(module): + m = pytest.importorskip(module) + a = m.arange(10) + assert isinstance(a, NDArrayLike) + + +def test_is_not_ndarray_like(): + assert not isinstance([1, 2, 3], NDArrayLike) + assert not isinstance(b"1,2,3", NDArrayLike) From efc8785347bfa7868d1ba274b769edd127e46cd4 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Feb 2022 16:37:06 +0100 Subject: [PATCH 3/6] removing memtype --- numcodecs/compat.py | 25 +++++++++++-------------- numcodecs/ndarray_like.py | 31 ++++--------------------------- 2 files changed, 15 insertions(+), 41 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index 81f17852..c631f58f 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -1,4 +1,5 @@ # flake8: noqa +import functools import sys import codecs import array @@ -6,10 +7,10 @@ import numpy as np -from .ndarray_like import NDArrayLike, ensure_memtype +from .ndarray_like import NDArrayLike, is_ndarray_like -def ensure_ndarray(buf, memtype="cpu") -> NDArrayLike: +def ensure_ndarray(buf) -> NDArrayLike: """Convenience function to coerce `buf` to a numpy array, if it is not already a numpy array. @@ -29,7 +30,7 @@ def ensure_ndarray(buf, memtype="cpu") -> NDArrayLike: return a view on memory exported by `buf`. """ - if not isinstance(buf, NDArrayLike): + if not is_ndarray_like(buf): if isinstance(buf, array.array) and buf.typecode in "cu": # Guard condition, do not support array.array with unicode type, this is # problematic because numpy does not support it on all platforms. Also do not @@ -43,14 +44,14 @@ def ensure_ndarray(buf, memtype="cpu") -> NDArrayLike: # instantiate array from memoryview, ensures no copy buf = np.array(mem, copy=False) - return ensure_memtype(buf, memtype=memtype) + return buf -def ensure_ndarray_like(buf, memtype=None) -> NDArrayLike: - return ensure_ndarray(buf, memtype=memtype) +def ensure_ndarray_like(buf) -> NDArrayLike: + return ensure_ndarray(buf) -def ensure_contiguous_ndarray(buf, max_buffer_size=None, memtype="cpu") -> NDArrayLike: +def ensure_contiguous_ndarray(buf, max_buffer_size=None) -> NDArrayLike: """Convenience function to coerce `buf` to a numpy array, if it is not already a numpy array. Also ensures that the returned value exports fully contiguous memory, and supports the new-style buffer interface. If the optional max_buffer_size is @@ -78,7 +79,7 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None, memtype="cpu") -> NDArr """ # ensure input is a numpy array - arr = ensure_ndarray(buf, memtype=memtype) + arr = ensure_ndarray(buf) # check for object arrays, these are just memory pointers, actual memory holding # item data is scattered elsewhere @@ -103,12 +104,8 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None, memtype="cpu") -> NDArr return arr -def ensure_contiguous_ndarray_like( - buf, max_buffer_size=None, memtype=None -) -> NDArrayLike: - return ensure_contiguous_ndarray( - buf, max_buffer_size=max_buffer_size, memtype=memtype - ) +def ensure_contiguous_ndarray_like(buf, max_buffer_size=None) -> NDArrayLike: + return ensure_contiguous_ndarray(buf, max_buffer_size=max_buffer_size) def ensure_bytes(buf) -> bytes: diff --git a/numcodecs/ndarray_like.py b/numcodecs/ndarray_like.py index 15773998..2ff00772 100644 --- a/numcodecs/ndarray_like.py +++ b/numcodecs/ndarray_like.py @@ -1,8 +1,5 @@ import sys -from collections import defaultdict -from typing import Any, Callable, DefaultDict, Dict, Mapping, Optional, Tuple - -import numpy as np +from typing import Any, Optional, Tuple if sys.version_info >= (3, 8): from typing import Protocol, runtime_checkable @@ -54,26 +51,6 @@ def view(self, dtype: DType = ...) -> "NDArrayLike": ... -ConvertFunc = Callable[[NDArrayLike], NDArrayLike] - -_ndarray_like_registry: DefaultDict[type, Dict[str, ConvertFunc]] = defaultdict(dict) - - -def register_ndarray_like(cls, convert_dict: Mapping[str, ConvertFunc]) -> None: - _ndarray_like_registry[cls].update(convert_dict) - - -def ensure_memtype(ary: NDArrayLike, memtype=Optional[str]) -> NDArrayLike: - if memtype is None: - return ary - return _ndarray_like_registry[ary.__class__][memtype](ary) - - -register_ndarray_like(np.ndarray, {"cpu": lambda x: x}) - -try: - import cupy -except ImportError: - pass -else: - register_ndarray_like(cupy.ndarray, {"cpu": cupy.asnumpy}) +def is_ndarray_like(obj: object) -> bool: + """Return True when `obj` is ndarray-like""" + return isinstance(obj, NDArrayLike) From db9a3b0920756b73e405a66017e00315f9273e92 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Feb 2022 17:04:27 +0100 Subject: [PATCH 4/6] ensure_ndarray and ensure_contiguous_ndarray to force numpy arrays --- numcodecs/compat.py | 91 ++++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/numcodecs/compat.py b/numcodecs/compat.py index c631f58f..7ab38337 100644 --- a/numcodecs/compat.py +++ b/numcodecs/compat.py @@ -10,26 +10,26 @@ from .ndarray_like import NDArrayLike, is_ndarray_like -def ensure_ndarray(buf) -> NDArrayLike: - """Convenience function to coerce `buf` to a numpy array, if it is not already a - numpy array. +def ensure_ndarray_like(buf) -> NDArrayLike: + """Convenience function to coerce `buf` to ndarray-like array. Parameters ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. Returns ------- - arr : ndarray - A numpy array, sharing memory with `buf`. + arr : NDArrayLike + A ndarray-like, sharing memory with `buf`. Notes ----- This function will not create a copy under any circumstances, it is guaranteed to return a view on memory exported by `buf`. - """ + if not is_ndarray_like(buf): if isinstance(buf, array.array) and buf.typecode in "cu": # Guard condition, do not support array.array with unicode type, this is @@ -40,46 +40,61 @@ def ensure_ndarray(buf) -> NDArrayLike: # N.B., first take a memoryview to make sure that we subsequently create a # numpy array from a memory buffer with no copy mem = memoryview(buf) - # instantiate array from memoryview, ensures no copy buf = np.array(mem, copy=False) - return buf -def ensure_ndarray_like(buf) -> NDArrayLike: - return ensure_ndarray(buf) +def ensure_ndarray(buf) -> np.ndarray: + """Convenience function to coerce `buf` to a numpy array, if it is not already a + numpy array. + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. -def ensure_contiguous_ndarray(buf, max_buffer_size=None) -> NDArrayLike: - """Convenience function to coerce `buf` to a numpy array, if it is not already a - numpy array. Also ensures that the returned value exports fully contiguous memory, + Returns + ------- + arr : ndarray + A numpy array, sharing memory with `buf`. + + Notes + ----- + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. + """ + return np.array(ensure_ndarray_like(buf), copy=False) + + +def ensure_contiguous_ndarray_like(buf, max_buffer_size=None) -> NDArrayLike: + """Convenience function to coerce `buf` to ndarray-like array. + + Also ensures that the returned value exports fully contiguous memory, and supports the new-style buffer interface. If the optional max_buffer_size is provided, raise a ValueError if the number of bytes consumed by the returned array exceeds this value. Parameters ---------- - buf : array-like or bytes-like - A numpy array or any object exporting a buffer interface. + buf : ndarray-like, array-like, or bytes-like + A numpy array like object such as numpy.ndarray, cupy.ndarray, or + any object exporting a buffer interface. max_buffer_size : int If specified, the largest allowable value of arr.nbytes, where arr is the returned array. Returns ------- - arr : ndarray - A numpy array, sharing memory with `buf`. + arr : NDArrayLike + A ndarray-like, sharing memory with `buf`. Notes ----- This function will not create a copy under any circumstances, it is guaranteed to return a view on memory exported by `buf`. - """ - - # ensure input is a numpy array - arr = ensure_ndarray(buf) + arr = ensure_ndarray_like(buf) # check for object arrays, these are just memory pointers, actual memory holding # item data is scattered elsewhere @@ -104,8 +119,34 @@ def ensure_contiguous_ndarray(buf, max_buffer_size=None) -> NDArrayLike: return arr -def ensure_contiguous_ndarray_like(buf, max_buffer_size=None) -> NDArrayLike: - return ensure_contiguous_ndarray(buf, max_buffer_size=max_buffer_size) +def ensure_contiguous_ndarray(buf, max_buffer_size=None) -> np.array: + """Convenience function to coerce `buf` to a numpy array, if it is not already a + numpy array. Also ensures that the returned value exports fully contiguous memory, + and supports the new-style buffer interface. If the optional max_buffer_size is + provided, raise a ValueError if the number of bytes consumed by the returned + array exceeds this value. + + Parameters + ---------- + buf : array-like or bytes-like + A numpy array or any object exporting a buffer interface. + max_buffer_size : int + If specified, the largest allowable value of arr.nbytes, where arr + is the returned array. + + Returns + ------- + arr : ndarray + A numpy array, sharing memory with `buf`. + + Notes + ----- + This function will not create a copy under any circumstances, it is guaranteed to + return a view on memory exported by `buf`. + """ + return ensure_ndarray( + ensure_contiguous_ndarray_like(buf, max_buffer_size=max_buffer_size) + ) def ensure_bytes(buf) -> bytes: From 38d28ff2bb2b5bb15cbac07873d6fbe597f6dd1b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Mon, 21 Feb 2022 13:49:36 +0100 Subject: [PATCH 5/6] Adding typing-extensions>=3.7.4 to dependencies --- requirements.txt | 1 + requirements_dev.txt | 2 +- requirements_rtfd.txt | 1 + setup.py | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 543bfb43..e309dd0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ cython numpy msgpack pytest +typing-extensions diff --git a/requirements_dev.txt b/requirements_dev.txt index fa8d5d7b..1ffd8632 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,4 +2,4 @@ Cython==0.29.21 msgpack==1.0.2 numpy==1.21.0 zfpy==0.5.5; python_version < '3.9' - +typing-extensions>=3.7.4 diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index 72bb7fe5..1e20bc1d 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -7,3 +7,4 @@ mock numpy cython zfpy==0.5.5; python_version < '3.9' +typing-extensions diff --git a/setup.py b/setup.py index c8622fd6..541c11ac 100644 --- a/setup.py +++ b/setup.py @@ -338,6 +338,7 @@ def run_setup(with_extensions): ], install_requires=[ 'numpy>=1.7', + 'typing-extensions>=3.7.4', ], extras_require={ 'msgpack': ["msgpack"], From ff671dbf48fd7c1edf1acb07ac23652bb48c6e88 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 23 Feb 2022 09:12:51 +0100 Subject: [PATCH 6/6] Added release note --- docs/release.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index a483ed64..60ef067e 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,11 +6,14 @@ Release notes Unreleased ---------- -.. _release_0.9.1: +* Add support of alternative array classes (other than NumPy arrays) + By :user:`Mads R. B. Kristensen `, :issue:`305`. * Add ability to find codecs via entrypoints By :user:`Martin Durant `, :issue:`290`. +.. _release_0.9.1: + 0.9.1 -----