Skip to content

Commit 8495469

Browse files
jeromekelleheralimanfoo
authored andcommitted
Chunkwise iteration over arrays. (#399)
* Chunkwise iteration over arrays. Closes #398. * Fixed lint error from new flake8 version.
1 parent 43f7fae commit 8495469

File tree

5 files changed

+57
-6
lines changed

5 files changed

+57
-6
lines changed

docs/release.rst

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,18 @@ Enhancements
2323
SQLite database to be used as the backing store for an array or group.
2424
By :user:`John Kirkham <jakirkham>`, :issue:`368`, :issue:`365`.
2525

26+
* Efficient iteration over arrays by decompressing chunkwise.
27+
By :user:`Jerome Kelleher <jeromekelleher>`, :issue:`398`.
28+
2629
Bug fixes
2730
~~~~~~~~~
2831

29-
* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to
30-
ensure that writes are atomic and there are no race conditions where a chunk might appear
31-
transiently missing during a write operation. By :user:`sbalmer <sbalmer>`, :issue:`327`,
32+
* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to
33+
ensure that writes are atomic and there are no race conditions where a chunk might appear
34+
transiently missing during a write operation. By :user:`sbalmer <sbalmer>`, :issue:`327`,
3235
:issue:`263`.
3336

34-
* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded
37+
* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded
3538
to 0.6.2, which has enabled some code simplification and fixes a failing test involving
3639
msgpack encoding. By :user:`John Kirkham <jakirkham>`, :issue:`361`, :issue:`360`, :issue:`352`,
3740
:issue:`355`, :issue:`324`.

zarr/compat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
text_type = unicode
1313
binary_type = str
1414
reduce = reduce
15+
from itertools import izip_longest as zip_longest
1516

1617
class PermissionError(Exception):
1718
pass
@@ -27,6 +28,7 @@ def OrderedDict_move_to_end(od, key):
2728
text_type = str
2829
binary_type = bytes
2930
from functools import reduce
31+
from itertools import zip_longest
3032
PermissionError = PermissionError
3133

3234
def OrderedDict_move_to_end(od, key):

zarr/core.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,18 @@ def __array__(self, *args):
424424
a = a.astype(args[0])
425425
return a
426426

427+
def __iter__(self):
428+
if len(self.shape) == 0:
429+
# Same error as numpy
430+
raise TypeError("iteration over a 0-d array")
431+
# Avoid repeatedly decompressing chunks by iterating over the chunks
432+
# in the first dimension.
433+
chunk_size = self.chunks[0]
434+
for j in range(self.shape[0]):
435+
if j % chunk_size == 0:
436+
chunk = self[j: j + chunk_size]
437+
yield chunk[j % chunk_size]
438+
427439
def __len__(self):
428440
if self.shape:
429441
return self.shape[0]

zarr/storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1449,7 +1449,7 @@ def flush(self):
14491449
if self.flag[0] != 'r':
14501450
with self.write_mutex:
14511451
if hasattr(self.db, 'sync'):
1452-
self.db.sync()
1452+
self.db.sync()
14531453
else:
14541454
# fall-back, close and re-open, needed for ndbm
14551455
flag = self.flag

zarr/tests/test_core.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
LRUStoreCache)
2020
from zarr.core import Array
2121
from zarr.errors import PermissionError
22-
from zarr.compat import PY2, text_type, binary_type
22+
from zarr.compat import PY2, text_type, binary_type, zip_longest
2323
from zarr.util import buffer_size
2424
from numcodecs import (Delta, FixedScaleOffset, Zlib, Blosc, BZ2, MsgPack, Pickle,
2525
Categorize, JSON, VLenUTF8, VLenBytes, VLenArray)
@@ -1155,6 +1155,40 @@ def test_object_codec_warnings(self):
11551155
# provide object_codec, but not object dtype
11561156
self.create_array(shape=10, chunks=5, dtype='i4', object_codec=JSON())
11571157

1158+
def test_zero_d_iter(self):
1159+
a = np.array(1, dtype=int)
1160+
z = self.create_array(shape=a.shape, dtype=int)
1161+
z[...] = a
1162+
with pytest.raises(TypeError):
1163+
# noinspection PyStatementEffect
1164+
list(a)
1165+
with pytest.raises(TypeError):
1166+
# noinspection PyStatementEffect
1167+
list(z)
1168+
1169+
def test_iter(self):
1170+
params = (
1171+
((1,), (1,)),
1172+
((2,), (1,)),
1173+
((1,), (2,)),
1174+
((3,), (3,)),
1175+
((1000,), (100,)),
1176+
((100,), (1000,)),
1177+
((1, 100), (1, 1)),
1178+
((1, 0), (1, 1)),
1179+
((0, 1), (1, 1)),
1180+
((0, 1), (2, 1)),
1181+
((100, 1), (3, 1)),
1182+
((100, 100), (10, 10)),
1183+
((10, 10, 10), (3, 3, 3)),
1184+
)
1185+
for shape, chunks in params:
1186+
z = self.create_array(shape=shape, chunks=chunks, dtype=int)
1187+
a = np.arange(np.product(shape)).reshape(shape)
1188+
z[:] = a
1189+
for expect, actual in zip_longest(a, z):
1190+
assert_array_equal(expect, actual)
1191+
11581192

11591193
class TestArrayWithPath(TestArray):
11601194

0 commit comments

Comments
 (0)