zarr-developers · alimanfoo · Feb 6, 2019 · Feb 5, 2019 · Feb 6, 2019
diff --git a/docs/release.rst b/docs/release.rst
@@ -23,15 +23,18 @@ Enhancements
   SQLite database to be used as the backing store for an array or group.
   By :user:`John Kirkham <jakirkham>`, :issue:`368`, :issue:`365`.
 
+* Efficient iteration over arrays by decompressing chunkwise.
+  By :user:`Jerome Kelleher <jeromekelleher>`, :issue:`398`.
+
 Bug fixes
 ~~~~~~~~~
 
-* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to 
-  ensure that writes are atomic and there are no race conditions where a chunk might appear 
-  transiently missing during a write operation. By :user:`sbalmer <sbalmer>`, :issue:`327`, 
+* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to
+  ensure that writes are atomic and there are no race conditions where a chunk might appear
+  transiently missing during a write operation. By :user:`sbalmer <sbalmer>`, :issue:`327`,
   :issue:`263`.
 
-* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded 
+* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded
   to 0.6.2, which has enabled some code simplification and fixes a failing test involving
   msgpack encoding. By :user:`John Kirkham <jakirkham>`, :issue:`361`, :issue:`360`, :issue:`352`,
   :issue:`355`, :issue:`324`.

diff --git a/zarr/compat.py b/zarr/compat.py
@@ -12,6 +12,7 @@
     text_type = unicode
     binary_type = str
     reduce = reduce
+    from itertools import izip_longest as zip_longest
 
     class PermissionError(Exception):
         pass
@@ -27,6 +28,7 @@ def OrderedDict_move_to_end(od, key):
     text_type = str
     binary_type = bytes
     from functools import reduce
+    from itertools import zip_longest
     PermissionError = PermissionError
 
     def OrderedDict_move_to_end(od, key):

diff --git a/zarr/core.py b/zarr/core.py
@@ -424,6 +424,18 @@ def __array__(self, *args):
             a = a.astype(args[0])
         return a
 
+    def __iter__(self):
+        if len(self.shape) == 0:
+            # Same error as numpy
+            raise TypeError("iteration over a 0-d array")
+        # Avoid repeatedly decompressing chunks by iterating over the chunks
+        # in the first dimension.
+        chunk_size = self.chunks[0]
+        for j in range(self.shape[0]):
+            if j % chunk_size == 0:
+                chunk = self[j: j + chunk_size]
+            yield chunk[j % chunk_size]
+
     def __len__(self):
         if self.shape:
             return self.shape[0]

diff --git a/zarr/storage.py b/zarr/storage.py
@@ -1449,7 +1449,7 @@ def flush(self):
         if self.flag[0] != 'r':
             with self.write_mutex:
                 if hasattr(self.db, 'sync'):
-                        self.db.sync()
+                    self.db.sync()
                 else:
                     # fall-back, close and re-open, needed for ndbm
                     flag = self.flag

diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py
@@ -19,7 +19,7 @@
                           LRUStoreCache)
 from zarr.core import Array
 from zarr.errors import PermissionError
-from zarr.compat import PY2, text_type, binary_type
+from zarr.compat import PY2, text_type, binary_type, zip_longest
 from zarr.util import buffer_size
 from numcodecs import (Delta, FixedScaleOffset, Zlib, Blosc, BZ2, MsgPack, Pickle,
                        Categorize, JSON, VLenUTF8, VLenBytes, VLenArray)
@@ -1155,6 +1155,40 @@ def test_object_codec_warnings(self):
             # provide object_codec, but not object dtype
             self.create_array(shape=10, chunks=5, dtype='i4', object_codec=JSON())
 
+    def test_zero_d_iter(self):
+        a = np.array(1, dtype=int)
+        z = self.create_array(shape=a.shape, dtype=int)
+        z[...] = a
+        with pytest.raises(TypeError):
+            # noinspection PyStatementEffect
+            list(a)
+        with pytest.raises(TypeError):
+            # noinspection PyStatementEffect
+            list(z)
+
+    def test_iter(self):
+        params = (
+            ((1,), (1,)),
+            ((2,), (1,)),
+            ((1,), (2,)),
+            ((3,), (3,)),
+            ((1000,), (100,)),
+            ((100,), (1000,)),
+            ((1, 100), (1, 1)),
+            ((1, 0), (1, 1)),
+            ((0, 1), (1, 1)),
+            ((0, 1), (2, 1)),
+            ((100, 1), (3, 1)),
+            ((100, 100), (10, 10)),
+            ((10, 10, 10), (3, 3, 3)),
+        )
+        for shape, chunks in params:
+            z = self.create_array(shape=shape, chunks=chunks, dtype=int)
+            a = np.arange(np.product(shape)).reshape(shape)
+            z[:] = a
+            for expect, actual in zip_longest(a, z):
+                assert_array_equal(expect, actual)
+
 
 class TestArrayWithPath(TestArray):