From 228e83fec7a376b77842482ac4dcb5e3443b8018 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Thu, 26 Oct 2017 00:14:33 +0100 Subject: [PATCH 1/2] ellipsis handling; resolves #93 --- zarr/tests/test_core.py | 71 +++++++++++++++++++++++++++++++++++++++-- zarr/tests/test_util.py | 21 ++++++++---- zarr/util.py | 54 +++++++++++++++++++------------ 3 files changed, 116 insertions(+), 30 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index bc45218f9d..27b78eac8f 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -123,6 +123,12 @@ def test_array_1d(self): assert_array_equal(a[:10], z[:10]) assert_array_equal(a[10:20], z[10:20]) assert_array_equal(a[-10:], z[-10:]) + assert_array_equal(a[:10, ...], z[:10, ...]) + assert_array_equal(a[10:20, ...], z[10:20, ...]) + assert_array_equal(a[-10:, ...], z[-10:, ...]) + assert_array_equal(a[..., :10], z[..., :10]) + assert_array_equal(a[..., 10:20], z[..., 10:20]) + assert_array_equal(a[..., -10:], z[..., -10:]) # ...across chunk boundaries... assert_array_equal(a[:110], z[:110]) assert_array_equal(a[190:310], z[190:310]) @@ -135,6 +141,18 @@ def test_array_1d(self): eq(a[42], z[np.int32(42)]) eq(a[42], z[np.uint64(42)]) eq(a[42], z[np.uint32(42)]) + # too many indices + with assert_raises(IndexError): + z[:, :] + with assert_raises(IndexError): + z[0, :] + with assert_raises(IndexError): + z[:, 0] + with assert_raises(IndexError): + z[0, 0] + # only single ellipsis allowed + with assert_raises(IndexError): + z[..., ...] # check partial assignment b = np.arange(1e5, 2e5) @@ -194,37 +212,84 @@ def test_array_2d(self): eq(a.nbytes, z.nbytes) eq(50, z.nchunks_initialized) - # check slicing + # check array-like assert_array_equal(a, np.array(z)) + + # check slicing + + # total slice assert_array_equal(a, z[:]) assert_array_equal(a, z[...]) # noinspection PyTypeChecker assert_array_equal(a, z[slice(None)]) + + # slice first dimension assert_array_equal(a[:10], z[:10]) assert_array_equal(a[10:20], z[10:20]) assert_array_equal(a[-10:], z[-10:]) + assert_array_equal(a[:10, :], z[:10, :]) + assert_array_equal(a[10:20, :], z[10:20, :]) + assert_array_equal(a[-10:, :], z[-10:, :]) + assert_array_equal(a[:10, ...], z[:10, ...]) + assert_array_equal(a[10:20, ...], z[10:20, ...]) + assert_array_equal(a[-10:, ...], z[-10:, ...]) + assert_array_equal(a[:10, :, ...], z[:10, :, ...]) + assert_array_equal(a[10:20, :, ...], z[10:20, :, ...]) + assert_array_equal(a[-10:, :, ...], z[-10:, :, ...]) + + # slice second dimension assert_array_equal(a[:, :2], z[:, :2]) assert_array_equal(a[:, 2:4], z[:, 2:4]) assert_array_equal(a[:, -2:], z[:, -2:]) + assert_array_equal(a[..., :2], z[..., :2]) + assert_array_equal(a[..., 2:4], z[..., 2:4]) + assert_array_equal(a[..., -2:], z[..., -2:]) + assert_array_equal(a[:, ..., :2], z[:, ..., :2]) + assert_array_equal(a[:, ..., 2:4], z[:, ..., 2:4]) + assert_array_equal(a[:, ..., -2:], z[:, ..., -2:]) + + # slice both dimensions assert_array_equal(a[:10, :2], z[:10, :2]) assert_array_equal(a[10:20, 2:4], z[10:20, 2:4]) assert_array_equal(a[-10:, -2:], z[-10:, -2:]) - # ...across chunk boundaries... + + # slicing across chunk boundaries assert_array_equal(a[:110], z[:110]) assert_array_equal(a[190:310], z[190:310]) assert_array_equal(a[-110:], z[-110:]) + assert_array_equal(a[:110, :], z[:110, :]) + assert_array_equal(a[190:310, :], z[190:310, :]) + assert_array_equal(a[-110:, :], z[-110:, :]) assert_array_equal(a[:, :3], z[:, :3]) assert_array_equal(a[:, 3:7], z[:, 3:7]) assert_array_equal(a[:, -3:], z[:, -3:]) assert_array_equal(a[:110, :3], z[:110, :3]) assert_array_equal(a[190:310, 3:7], z[190:310, 3:7]) assert_array_equal(a[-110:, -3:], z[-110:, -3:]) - # single item + + # single row/col/item assert_array_equal(a[0], z[0]) assert_array_equal(a[-1], z[-1]) + assert_array_equal(a[:, 0], z[:, 0]) + assert_array_equal(a[:, -1], z[:, -1]) eq(a[0, 0], z[0, 0]) eq(a[-1, -1], z[-1, -1]) + # too many indices + with assert_raises(IndexError): + z[:, :, :] + with assert_raises(IndexError): + z[0, :, :] + with assert_raises(IndexError): + z[:, 0, :] + with assert_raises(IndexError): + z[:, :, 0] + with assert_raises(IndexError): + z[0, 0, 0] + # only single ellipsis allowed + with assert_raises(IndexError): + z[..., ...] + # check partial assignment b = np.arange(10000, 20000).reshape((1000, 10)) z[190:310, 3:7] = b[190:310, 3:7] diff --git a/zarr/tests/test_util.py b/zarr/tests/test_util.py index 5866ab9836..32fbe8e04e 100644 --- a/zarr/tests/test_util.py +++ b/zarr/tests/test_util.py @@ -111,18 +111,17 @@ def test_normalize_array_selection(): eq((slice(0, 100),), normalize_array_selection(slice(None), (100,))) eq((slice(0, 100),), normalize_array_selection(slice(None, 100), (100,))) eq((slice(0, 100),), normalize_array_selection(slice(0, None), (100,))) + eq((slice(0, 100),), normalize_array_selection((slice(None), Ellipsis), (100,))) + eq((slice(0, 100),), normalize_array_selection((Ellipsis, slice(None)), (100,))) # 2D, single item eq((0, 0), normalize_array_selection((0, 0), (100, 100))) eq((99, 1), normalize_array_selection((-1, 1), (100, 100))) # 2D, single col/row - eq((0, slice(0, 100)), normalize_array_selection((0, slice(None)), - (100, 100))) - eq((0, slice(0, 100)), normalize_array_selection((0,), - (100, 100))) - eq((slice(0, 100), 0), normalize_array_selection((slice(None), 0), - (100, 100))) + eq((0, slice(0, 100)), normalize_array_selection((0, slice(None)), (100, 100))) + eq((0, slice(0, 100)), normalize_array_selection((0,), (100, 100))) + eq((slice(0, 100), 0), normalize_array_selection((slice(None), 0), (100, 100))) # 2D slice eq((slice(0, 100), slice(0, 100)), @@ -131,6 +130,16 @@ def test_normalize_array_selection(): normalize_array_selection(slice(None), (100, 100))) eq((slice(0, 100), slice(0, 100)), normalize_array_selection((slice(None), slice(None)), (100, 100))) + eq((slice(0, 100), slice(0, 100)), + normalize_array_selection((Ellipsis, slice(None)), (100, 100))) + eq((slice(0, 100), slice(0, 100)), + normalize_array_selection((slice(None), Ellipsis), (100, 100))) + eq((slice(0, 100), slice(0, 100)), + normalize_array_selection((slice(None), Ellipsis, slice(None)), (100, 100))) + eq((slice(0, 100), slice(0, 100)), + normalize_array_selection((Ellipsis, slice(None), slice(None)), (100, 100))) + eq((slice(0, 100), slice(0, 100)), + normalize_array_selection((slice(None), slice(None), Ellipsis), (100, 100))) with assert_raises(TypeError): normalize_array_selection('foo', (100,)) diff --git a/zarr/util.py b/zarr/util.py index c5e093997d..fd39d0a2e4 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -192,29 +192,41 @@ def normalize_array_selection(item, shape): """Convenience function to normalize a selection within an array with the given `shape`.""" - # normalize item - if isinstance(item, numbers.Integral): - item = (int(item),) - elif isinstance(item, slice): + # ensure tuple + if not isinstance(item, tuple): item = (item,) - elif item == Ellipsis: - item = (slice(None),) - - # handle tuple of indices/slices - if isinstance(item, tuple): - # determine start and stop indices for all axes - selection = tuple(normalize_axis_selection(i, l) - for i, l in zip(item, shape)) - - # fill out selection if not completely specified - if len(selection) < len(shape): - selection += tuple(slice(0, l) for l in shape[len(selection):]) - - return selection - - else: - raise TypeError('expected indices or slice, found: %r' % item) + # handle ellipsis + n_ellipsis = sum(1 for i in item if i == Ellipsis) + if n_ellipsis > 1: + raise IndexError("an index can only have a single ellipsis ('...')") + elif n_ellipsis == 1: + idx_ellipsis = item.index(Ellipsis) + n_items_l = idx_ellipsis # items to left of ellipsis + n_items_r = len(item) - (idx_ellipsis + 1) # items to right of ellipsis + n_items = len(item) - 1 # all non-ellipsis items + if n_items >= len(shape): + # ellipsis does nothing, just remove it + item = tuple(i for i in item if i != Ellipsis) + else: + # replace ellipsis with slices + new_item = item[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) + if n_items_r: + new_item += item[-n_items_r:] + item = new_item + + # check dimensionality + if len(item) > len(shape): + raise IndexError('too many indices for array') + + # determine start and stop indices for all axes + selection = tuple(normalize_axis_selection(i, l) for i, l in zip(item, shape)) + + # fill out selection if not completely specified + if len(selection) < len(shape): + selection += tuple(slice(0, l) for l in shape[len(selection):]) + + return selection def get_chunk_range(selection, chunks): From b9cd024fbe465f71fa60d446e2788d8bee80c084 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Thu, 26 Oct 2017 22:02:25 +0100 Subject: [PATCH 2/2] minor conciseness --- zarr/util.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/zarr/util.py b/zarr/util.py index fd39d0a2e4..1500ca95a7 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -201,15 +201,14 @@ def normalize_array_selection(item, shape): if n_ellipsis > 1: raise IndexError("an index can only have a single ellipsis ('...')") elif n_ellipsis == 1: - idx_ellipsis = item.index(Ellipsis) - n_items_l = idx_ellipsis # items to left of ellipsis - n_items_r = len(item) - (idx_ellipsis + 1) # items to right of ellipsis + n_items_l = item.index(Ellipsis) # items to left of ellipsis + n_items_r = len(item) - (n_items_l + 1) # items to right of ellipsis n_items = len(item) - 1 # all non-ellipsis items if n_items >= len(shape): # ellipsis does nothing, just remove it item = tuple(i for i in item if i != Ellipsis) else: - # replace ellipsis with slices + # replace ellipsis with as many slices are needed for number of dims new_item = item[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) if n_items_r: new_item += item[-n_items_r:]