From d11cb576fd5a167c909f7d6e05717cbc9fea1401 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 5 Feb 2019 19:59:59 +0000 Subject: [PATCH 1/2] Chunkwise iteration over arrays. Closes #398. --- docs/release.rst | 11 +++++++---- zarr/compat.py | 2 ++ zarr/core.py | 12 ++++++++++++ zarr/tests/test_core.py | 36 +++++++++++++++++++++++++++++++++++- 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index f0d5a559ab..d996240afa 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -23,15 +23,18 @@ Enhancements SQLite database to be used as the backing store for an array or group. By :user:`John Kirkham `, :issue:`368`, :issue:`365`. +* Efficient iteration over arrays by decompressing chunkwise. + By :user:`Jerome Kelleher `, :issue:`398`. + Bug fixes ~~~~~~~~~ -* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to - ensure that writes are atomic and there are no race conditions where a chunk might appear - transiently missing during a write operation. By :user:`sbalmer `, :issue:`327`, +* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to + ensure that writes are atomic and there are no race conditions where a chunk might appear + transiently missing during a write operation. By :user:`sbalmer `, :issue:`327`, :issue:`263`. -* The required version of the `numcodecs `_ package has been upgraded +* The required version of the `numcodecs `_ package has been upgraded to 0.6.2, which has enabled some code simplification and fixes a failing test involving msgpack encoding. By :user:`John Kirkham `, :issue:`361`, :issue:`360`, :issue:`352`, :issue:`355`, :issue:`324`. diff --git a/zarr/compat.py b/zarr/compat.py index 117a8edf59..91a75548e6 100644 --- a/zarr/compat.py +++ b/zarr/compat.py @@ -12,6 +12,7 @@ text_type = unicode binary_type = str reduce = reduce + from itertools import izip_longest as zip_longest class PermissionError(Exception): pass @@ -27,6 +28,7 @@ def OrderedDict_move_to_end(od, key): text_type = str binary_type = bytes from functools import reduce + from itertools import zip_longest PermissionError = PermissionError def OrderedDict_move_to_end(od, key): diff --git a/zarr/core.py b/zarr/core.py index cc04953a7c..0838117b89 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -424,6 +424,18 @@ def __array__(self, *args): a = a.astype(args[0]) return a + def __iter__(self): + if len(self.shape) == 0: + # Same error as numpy + raise TypeError("iteration over a 0-d array") + # Avoid repeatedly decompressing chunks by iterating over the chunks + # in the first dimension. + chunk_size = self.chunks[0] + for j in range(self.shape[0]): + if j % chunk_size == 0: + chunk = self[j: j + chunk_size] + yield chunk[j % chunk_size] + def __len__(self): if self.shape: return self.shape[0] diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index b2b6bb011e..1c7d526c0c 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -19,7 +19,7 @@ LRUStoreCache) from zarr.core import Array from zarr.errors import PermissionError -from zarr.compat import PY2, text_type, binary_type +from zarr.compat import PY2, text_type, binary_type, zip_longest from zarr.util import buffer_size from numcodecs import (Delta, FixedScaleOffset, Zlib, Blosc, BZ2, MsgPack, Pickle, Categorize, JSON, VLenUTF8, VLenBytes, VLenArray) @@ -1155,6 +1155,40 @@ def test_object_codec_warnings(self): # provide object_codec, but not object dtype self.create_array(shape=10, chunks=5, dtype='i4', object_codec=JSON()) + def test_zero_d_iter(self): + a = np.array(1, dtype=int) + z = self.create_array(shape=a.shape, dtype=int) + z[...] = a + with pytest.raises(TypeError): + # noinspection PyStatementEffect + list(a) + with pytest.raises(TypeError): + # noinspection PyStatementEffect + list(z) + + def test_iter(self): + params = ( + ((1,), (1,)), + ((2,), (1,)), + ((1,), (2,)), + ((3,), (3,)), + ((1000,), (100,)), + ((100,), (1000,)), + ((1, 100), (1, 1)), + ((1, 0), (1, 1)), + ((0, 1), (1, 1)), + ((0, 1), (2, 1)), + ((100, 1), (3, 1)), + ((100, 100), (10, 10)), + ((10, 10, 10), (3, 3, 3)), + ) + for shape, chunks in params: + z = self.create_array(shape=shape, chunks=chunks, dtype=int) + a = np.arange(np.product(shape)).reshape(shape) + z[:] = a + for expect, actual in zip_longest(a, z): + assert_array_equal(expect, actual) + class TestArrayWithPath(TestArray): From 1fa04e7b80caf78a136423b167696effd64f8bdd Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 6 Feb 2019 09:13:33 +0000 Subject: [PATCH 2/2] Fixed lint error from new flake8 version. --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2a07d9aa38..75e4d7d04d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1449,7 +1449,7 @@ def flush(self): if self.flag[0] != 'r': with self.write_mutex: if hasattr(self.db, 'sync'): - self.db.sync() + self.db.sync() else: # fall-back, close and re-open, needed for ndbm flag = self.flag