From a1e8b54799ab144ae962f70219a87282d05653a8 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 11 Jun 2024 19:36:21 +0200 Subject: [PATCH 01/18] fix typing --- src/zarr/indexing.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 98130fe0cd..f5516d7606 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -30,32 +30,22 @@ from zarr.buffer import NDArrayLike from zarr.chunk_grids import ChunkGrid -BasicSelector = int | slice | EllipsisType -BasicSelectorTuple = tuple[BasicSelector, ...] -BasicSelection = BasicSelector | BasicSelectorTuple -BasicSelectionNormalized = tuple[int | slice, ...] -CoordinateSelector = list[int] | npt.NDArray[np.intp] -CoordinateSelection = CoordinateSelector | tuple[CoordinateSelector, ...] -CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] -BlockSelector = int | slice -BlockSelection = BlockSelector | tuple[BlockSelector, ...] -BlockSelectionNormalized = tuple[BlockSelector, ...] +IntOrSlice = int | slice +IntOrSliceOrEllipsis = IntOrSlice | EllipsisType +IntSequence = list[int] | npt.NDArray[np.intp] +IntOrSliceOrArray = IntOrSlice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] + +BasicSelection = IntOrSliceOrEllipsis | tuple[IntOrSliceOrEllipsis, ...] +CoordinateSelection = IntSequence | tuple[IntSequence, ...] +BlockSelection = IntOrSlice | tuple[IntOrSlice, ...] MaskSelection = npt.NDArray[np.bool_] -OrthogonalSelector = int | slice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] -OrthogonalSelection = OrthogonalSelector | tuple[OrthogonalSelector, ...] -OrthogonalSelectionNormalized = tuple[OrthogonalSelector, ...] - +OrthogonalSelection = IntOrSliceOrArray | tuple[IntOrSliceOrArray, ...] Selection = ( BasicSelection | CoordinateSelection | BlockSelection | MaskSelection | OrthogonalSelection ) -SelectionNormalized = ( - BasicSelectionNormalized - | CoordinateSelectionNormalized - | BlockSelectionNormalized - | MaskSelection - | OrthogonalSelectionNormalized -) -Selector = int | slice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] +CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] +SelectionNormalized = tuple[IntOrSliceOrArray, ...] | CoordinateSelectionNormalized | MaskSelection +Selector = IntOrSlice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] SelectionWithFields = Selection | str | Sequence[str] SelectorTuple = tuple[Selector, ...] | npt.NDArray[np.intp] | slice Fields = str | list[str] | tuple[str, ...] @@ -1206,8 +1196,8 @@ def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection return fields, selection -def make_slice_selection(selection: Any) -> list[int | slice]: - ls: list[int | slice] = [] +def make_slice_selection(selection: Any) -> list[IntOrSlice]: + ls: list[IntOrSlice] = [] for dim_selection in selection: if is_integer(dim_selection): ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) From 448f8486f5b9b5b3a61f7809a23ace029f5cf1a7 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 11 Jun 2024 21:16:45 +0200 Subject: [PATCH 02/18] add docstring for get_block_selection --- src/zarr/array.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++ src/zarr/buffer.py | 2 +- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 9ac1ce41ec..5249bf6587 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -856,6 +856,86 @@ def get_block_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing the indices + (coordinates) for each selected item. + + Parameters + ---------- + selection : int or slice or tuple of int or slice + An integer (coordinate) or slice for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + NDArrayLike + A NumPy-like array containing the data for the requested block selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3, 3), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + Retrieve items by specifying their block coordinates:: + + >>> z.get_block_selection((1, slice(None))) + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Which is equivalent to:: + + >>> z[3:6, :] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + For convenience, the block selection functionality is also available via the + `blocks` property, e.g.:: + + >>> z.blocks[1] + array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + Block index arrays may be multidimensional to index multidimensional arrays. + For example:: + + >>> z.blocks[0, 1:3] + array([[ 3, 4, 5, 6, 7, 8], + [13, 14, 15, 16, 17, 18], + [23, 24, 25, 26, 27, 28]]) + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 1a34d9f290..81de5a6675 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -232,7 +232,7 @@ def __add__(self, other: Buffer) -> Self: class NDBuffer: - """A n-dimensional memory block + """An n-dimensional memory block We use NDBuffer throughout Zarr to represent a n-dimensional memory block. From 439aacced7dd138745647b84af167d63e310fd04 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 11:53:35 +0200 Subject: [PATCH 03/18] add docstring for get_basic_selection and get_coordinate_selection --- src/zarr/array.py | 181 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index 5249bf6587..5f629d1751 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -727,6 +727,119 @@ def get_basic_selection( prototype: BufferPrototype = default_buffer_prototype, fields: Fields | None = None, ) -> NDArrayLike: + """Retrieve data for an item or region of the array. + + Parameters + ---------- + selection : tuple + A tuple specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + out : NDArrayLike + A NumPy-like array containing the data for the requested region. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(0, 100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3,), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve a single item:: + + >>> z.get_basic_selection(5) + 5 + + Retrieve a region via slicing:: + + >>> z.get_basic_selection(slice(5)) + array([0, 1, 2, 3, 4]) + >>> z.get_basic_selection(slice(-5, None)) + array([95, 96, 97, 98, 99]) + >>> z.get_basic_selection(slice(5, 10)) + array([5, 6, 7, 8, 9]) + >>> z.get_basic_selection(slice(5, 10, 2)) + array([5, 7, 9]) + >>> z.get_basic_selection(slice(None, None, 2)) + array([ 0, 2, 4, ..., 94, 96, 98]) + + Setup a 3-dimensional array:: + + >>> data = np.arange(1000).reshape(10, 10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(5, 5, 5), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve an item:: + + >>> z.get_basic_selection((1, 2, 3)) + 123 + + Retrieve a region via slicing and Ellipsis:: + + >>> z.get_basic_selection((slice(1, 3), slice(1, 3), 0)) + array([[110, 120], + [210, 220]]) + >>> z.get_basic_selection(0, (slice(1, 3), slice(None))) + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) + >>> z.get_basic_selection((..., 5)) + array([[ 2 12 22 32 42 52 62 72 82 92] + [102 112 122 132 142 152 162 172 182 192] + ... + [802 812 822 832 842 852 862 872 882 892] + [902 912 922 932 942 952 962 972 982 992]] + + TODO: check this example + For arrays with a structured dtype, specific fields can be retrieved, e.g.:: + + >>> a = np.array([(b'aaa', 1, 4.2), + ... (b'bbb', 2, 8.4), + ... (b'ccc', 3, 12.6)], + ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + >>> z = zarr.v2.array(a) + >>> z.get_basic_selection(slice(2), fields='foo') + array([b'aaa', b'bbb'], + dtype='|S3') + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + This method provides the implementation for accessing data via the + square bracket notation (__getitem__). See :func:`__getitem__` for examples + using the alternative notation. + + See Also + -------- + set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ + if self.shape == (): raise NotImplementedError else: @@ -812,6 +925,73 @@ def get_coordinate_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing the indices + (coordinates) for each selected item. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + out : NDArrayLike + A NumPy-like array containing the data for the requested coordinate selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(3, 3), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve items by specifying their coordinates:: + + >>> z.get_coordinate_selection(([1, 4], [1, 4])) + array([11, 44]) + + For convenience, the coordinate selection functionality is also available via the + `vindex` property, e.g.:: + + >>> z.vindex[[1, 4], [1, 4]] + array([11, 44]) + + Notes + ----- + Coordinate indexing is also known as point selection, and is a form of vectorized + or inner indexing. + + Slices are not supported. Coordinate arrays must be provided for all dimensions + of the array. + + Coordinate arrays may be multidimensional, in which case the output array will + also be multidimensional. Coordinate arrays are broadcast against each other + before being applied. The shape of the output will be the same as the shape of + each coordinate array after broadcasting. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = sync( self._async_array._get_selection( @@ -890,6 +1070,7 @@ def get_block_selection( >>> dtype=data.dtype, >>> ) >>> z[:] = data + Retrieve items by specifying their block coordinates:: >>> z.get_block_selection((1, slice(None))) From c501550eebec3ecab212181133649fb9f0ae637e Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 12:24:56 +0200 Subject: [PATCH 04/18] add note for get_basic_selection with structured dtype --- src/zarr/array.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 5f629d1751..1a00d3538b 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -60,7 +60,7 @@ pop_fields, ) from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata -from zarr.store import StoreLike, StorePath, make_store_path +from zarr.store import StoreLike, StorePath, make_store_path, MemoryStore from zarr.sync import sync @@ -811,22 +811,13 @@ def get_basic_selection( [802 812 822 832 842 852 862 872 882 892] [902 912 922 932 942 952 962 972 982 992]] - TODO: check this example - For arrays with a structured dtype, specific fields can be retrieved, e.g.:: - - >>> a = np.array([(b'aaa', 1, 4.2), - ... (b'bbb', 2, 8.4), - ... (b'ccc', 3, 12.6)], - ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.v2.array(a) - >>> z.get_basic_selection(slice(2), fields='foo') - array([b'aaa', b'bbb'], - dtype='|S3') - Notes ----- Slices with step > 1 are supported, but slices with negative step are not. + For arrays with a structured dtype, see zarr v2 for examples of how to use + the `fields` parameter. + This method provides the implementation for accessing data via the square bracket notation (__getitem__). See :func:`__getitem__` for examples using the alternative notation. From 44b840a2d0f7259a78ed12f7212815fe9f63aab4 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 17:10:35 +0200 Subject: [PATCH 05/18] remove common.Selection and replace by indexing.Selection --- src/zarr/array.py | 6 +++--- src/zarr/common.py | 2 -- src/zarr/indexing.py | 4 ++-- tests/v3/test_codecs.py | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 1a00d3538b..c07b94f6f7 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -30,7 +30,6 @@ ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, - Selection, ZarrFormat, concurrent_map, product, @@ -51,6 +50,7 @@ OIndex, OrthogonalIndexer, OrthogonalSelection, + Selection, VIndex, check_fields, check_no_multi_fields, @@ -460,7 +460,7 @@ async def _get_selection( return out_buffer.as_ndarray_like() async def getitem( - self, selection: Selection, *, prototype: BufferPrototype = default_buffer_prototype + self, selection: BasicSelection, *, prototype: BufferPrototype = default_buffer_prototype ) -> NDArrayLike: indexer = BasicIndexer( selection, @@ -520,7 +520,7 @@ async def _set_selection( async def setitem( self, - selection: Selection, + selection: BasicSelection, value: NDArrayLike, prototype: BufferPrototype = default_buffer_prototype, ) -> None: diff --git a/src/zarr/common.py b/src/zarr/common.py index 9349f9f018..6bff189e86 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -30,8 +30,6 @@ BytesLike = bytes | bytearray | memoryview ChunkCoords = tuple[int, ...] ChunkCoordsLike = Iterable[int] -SliceSelection = tuple[slice, ...] -Selection = slice | SliceSelection ZarrFormat = Literal[2, 3] JSON = None | str | int | float | Enum | dict[str, "JSON"] | list["JSON"] | tuple["JSON", ...] MemoryOrder = Literal["C", "F"] diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index f5516d7606..069dad5804 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -1196,8 +1196,8 @@ def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection return fields, selection -def make_slice_selection(selection: Any) -> list[IntOrSlice]: - ls: list[IntOrSlice] = [] +def make_slice_selection(selection: Any) -> list[slice]: + ls: list[slice] = [] for dim_selection in selection: if is_integer(dim_selection): ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 514294c4b0..1ce03ed0b0 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -21,9 +21,8 @@ TransposeCodec, ZstdCodec, ) -from zarr.common import Selection from zarr.config import config -from zarr.indexing import morton_order_iter +from zarr.indexing import morton_order_iter, Selection from zarr.store import MemoryStore, StorePath from zarr.testing.utils import assert_bytes_equal From 64c1a6e7bc583378cfe243c69fefd8bafa642632 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 17:12:28 +0200 Subject: [PATCH 06/18] add docstring for set_block_selection --- src/zarr/array.py | 81 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index c07b94f6f7..aa83b141c7 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1123,6 +1123,85 @@ def set_block_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> None: + """Modify a selection of individual blocks, by providing the chunk indices + (coordinates) for each block to be modified. + + Parameters + ---------- + selection : int or slice or tuple of int or slice + An integer (coordinate) or slice for each dimension of the array. + value : NDArrayLike + A NumPy-like array containing the data to be stored in the block selection. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Set up a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(0, 36, dtype="uint16").reshape((6, 6)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(2, 2), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set data for a selection of items:: + + >>> z.set_block_selection((1, 0), 1) + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0]]) + + For convenience, this functionality is also available via the `blocks` property. + E.g.:: + + >>> z.blocks[2, 1] = 4 + >>> z[...] + array([[0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 4, 4, 0, 0], + [0, 0, 4, 4, 0, 0]]) + + >>> z.blocks[:, 2] = 7 + >>> z[...] + array([[0, 0, 0, 0, 7, 7], + [0, 0, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [1, 1, 0, 0, 7, 7], + [0, 0, 4, 4, 7, 7], + [0, 0, 4, 4, 7, 7]]) + + Notes + ----- + Block indexing is a convenience indexing method to work on individual chunks + with chunk index slicing. It has the same concept as Dask's `Array.blocks` + indexing. + + Slices are supported. However, only with a step size of one. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @@ -1136,6 +1215,8 @@ def oindex(self) -> OIndex: @property def blocks(self) -> BlockIndex: + """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and + :func:`set_block_selection` for documentation and examples.""" return BlockIndex(self) def resize(self, new_shape: ChunkCoords) -> Array: From 5c866c60e1ceae01c449458ee9bcfec9c90ca84a Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 18:26:01 +0200 Subject: [PATCH 07/18] add docstring for __getitem__ and __setitem__ --- src/zarr/array.py | 238 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index aa83b141c7..147b1ec79a 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -702,6 +702,148 @@ def read_only(self) -> bool: return self._async_array.read_only def __getitem__(self, selection: Selection) -> NDArrayLike: + """Retrieve data for an item or region of the array. + + Parameters + ---------- + selection : tuple + An integer index or slice or tuple of int/slice objects specifying the + requested item or region for each dimension of the array. + + Returns + ------- + out : NDArrayLike + A NumPy-like array containing the data for the requested region. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(10,), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve a single item:: + + >>> z[5] + 5 + + Retrieve a region via slicing:: + + >>> z[:5] + array([0, 1, 2, 3, 4]) + >>> z[-5:] + array([95, 96, 97, 98, 99]) + >>> z[5:10] + array([5, 6, 7, 8, 9]) + >>> z[5:10:2] + array([5, 7, 9]) + >>> z[::2] + array([ 0, 2, 4, ..., 94, 96, 98]) + + Load the entire array into memory:: + + >>> z[...] + array([ 0, 1, 2, ..., 97, 98, 99]) + + Setup a 2-dimensional array:: + + >>> data = np.arange(100, dtype="uint16").reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(10, 10), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve an item:: + + >>> z[2, 2] + 22 + + Retrieve a region via slicing:: + + >>> z[1:3, 1:3] + array([[11, 12], + [21, 22]]) + >>> z[1:3, :] + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) + >>> z[:, 1:3] + array([[ 1, 2], + [11, 12], + [21, 22], + [31, 32], + [41, 42], + [51, 52], + [61, 62], + [71, 72], + [81, 82], + [91, 92]]) + >>> z[0:5:2, 0:5:2] + array([[ 0, 2, 4], + [20, 22, 24], + [40, 42, 44]]) + >>> z[::2, ::2] + array([[ 0, 2, 4, 6, 8], + [20, 22, 24, 26, 28], + [40, 42, 44, 46, 48], + [60, 62, 64, 66, 68], + [80, 82, 84, 86, 88]]) + + Load the entire array into memory:: + + >>> z[...] + array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], + [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], + [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], + [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], + [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + For arrays with a structured dtype, see zarr v2 for examples of how to use + fields + + Currently the implementation for __getitem__ is provided by + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the + methods listed under See Also. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __setitem__ + + """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): return self.vindex[cast(CoordinateSelection | MaskSelection, selection)] @@ -710,7 +852,103 @@ def __getitem__(self, selection: Selection) -> NDArrayLike: else: return self.get_basic_selection(cast(BasicSelection, pure_selection), fields=fields) + # TODO is int or python lists as value supported? if so, adjust typing def __setitem__(self, selection: Selection, value: NDArrayLike) -> None: + """Modify data for an item or region of the array. + + Parameters + ---------- + selection : tuple + An integer index or slice or tuple of int/slice specifying the requested + region for each dimension of the array. + value : NDArrayLike + A NumPy-like array containing the data to be stored in the selection. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=(2, 2), + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set all array elements to the same scalar value:: + + >>> z[...] = 42 + >>> z[...] + array([42, 42, 42, ..., 42, 42, 42]) + + Set a portion of the array:: + + >>> z[:10] = np.arange(10) + >>> z[-10:] = np.arange(10)[::-1] + >>> z[...] + array([ 0, 1, 2, ..., 2, 1, 0]) + + Setup a 2-dimensional array:: + + >>> data = np.zeros(0, 25, dtype="uint16").reshape(5, 5) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set all array elements to the same scalar value:: + + >>> z[...] = 42 + + Set a portion of the array:: + + >>> z[0, :] = np.arange(z.shape[1]) + >>> z[:, 0] = np.arange(z.shape[0]) + >>> z[...] + array([[ 0, 1, 2, 3, 4], + [ 1, 42, 42, 42, 42], + [ 2, 42, 42, 42, 42], + [ 3, 42, 42, 42, 42], + [ 4, 42, 42, 42, 42]]) + + Notes + ----- + Slices with step > 1 are supported, but slices with negative step are not. + + For arrays with a structured dtype, see zarr v2 for examples of how to use + fields + + Currently the implementation for __setitem__ is provided by + :func:`vindex` if the indexing is pure fancy indexing (ie a + broadcast-compatible tuple of integer array indices), or by + :func:`set_basic_selection` otherwise. + + Effectively, this means that the following indexing modes are supported: + + - integer indexing + - slice indexing + - mixed slice and integer indexing + - boolean indexing + - fancy indexing (vectorized list of integers) + + For specific indexing options including outer indexing, see the + methods listed under See Also. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__ + + """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): self.vindex[cast(CoordinateSelection | MaskSelection, selection)] = value From b720a36290afe980665344ec90ed88392dcaf89e Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Wed, 12 Jun 2024 18:35:48 +0200 Subject: [PATCH 08/18] add docstring for set_basic_selection --- src/zarr/array.py | 91 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 147b1ec79a..7daae21845 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1089,6 +1089,93 @@ def set_basic_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> None: + """Modify data for an item or region of the array. + + Parameters + ---------- + selection : tuple + A tuple specifying the requested item or region for each dimension of the + array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. + value : NDArrayLike + Values to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Setup a 1-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(0, 100, dtype="uint16") + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set all array elements to the same scalar value:: + + >>> z.set_basic_selection(..., 42) + >>> z[...] + array([42, 42, 42, ..., 42, 42, 42]) + + Set a portion of the array:: + + >>> z.set_basic_selection(slice(10), np.arange(10)) + >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) + >>> z[...] + array([ 0, 1, 2, ..., 2, 1, 0]) + + Setup a 2-dimensional array:: + + >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set all array elements to the same scalar value:: + + >>> z.set_basic_selection(..., 42) + + Set a portion of the array:: + + >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) + >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) + >>> z[...] + array([[ 0, 1, 2, 3, 4], + [ 1, 42, 42, 42, 42], + [ 2, 42, 42, 42, 42], + [ 3, 42, 42, 42, 42], + [ 4, 42, 42, 42, 42]]) + + Notes + ----- + For arrays with a structured dtype, see zarr v2 for examples of how to use + the `fields` parameter. + + This method provides the underlying implementation for modifying data via square + bracket notation, see :func:`__setitem__` for equivalent examples using the + alternative notation. + + See Also + -------- + get_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + set_orthogonal_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @@ -1278,7 +1365,7 @@ def get_block_selection( For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional - The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- @@ -1383,7 +1470,7 @@ def set_block_selection( >>> import zarr >>> import numpy as np - >>> data = np.zeros(0, 36, dtype="uint16").reshape((6, 6)) + >>> data = np.zeros(36, dtype="uint16").reshape((6, 6)) >>> z = Array.create( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, From 201e376bfcae3c22360b45985cddda1f001bf7ef Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 13 Jun 2024 13:19:54 +0200 Subject: [PATCH 09/18] add docstring for set and get_orthogonal_selection --- src/zarr/array.py | 207 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 1 deletion(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 7daae21845..5e985c0299 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1187,6 +1187,113 @@ def get_orthogonal_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> NDArrayLike: + """Retrieve data by making a selection for each dimension of the array. For + example, if an array has 2 dimensions, allows selecting specific rows and/or + columns. The selection for each dimension can be either an integer (indexing a + single item), a slice, an array of integers, or a Boolean array where True + values indicate a selection. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + out : NDArrayLike + A NumPy-like array containing the data for the requested selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100).reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve rows and columns via any combination of int, slice, integer array and/or + Boolean array:: + + >>> z.get_orthogonal_selection(([1, 4], slice(None))) + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + >>> z.get_orthogonal_selection((slice(None), [1, 4])) + array([[ 1, 4], + [11, 14], + [21, 24], + [31, 34], + [41, 44], + [51, 54], + [61, 64], + [71, 74], + [81, 84], + [91, 94]]) + >>> z.get_orthogonal_selection(([1, 4], [1, 4])) + array([[11, 14], + [41, 44]]) + >>> sel = np.zeros(z.shape[0], dtype=bool) + >>> sel[1] = True + >>> sel[4] = True + >>> z.get_orthogonal_selection((sel, sel)) + array([[11, 14], + [41, 44]]) + + For convenience, the orthogonal selection functionality is also available via the + `oindex` property, e.g.:: + + >>> z.oindex[[1, 4], :] + array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) + >>> z.oindex[:, [1, 4]] + array([[ 1, 4], + [11, 14], + [21, 24], + [31, 34], + [41, 44], + [51, 54], + [61, 64], + [71, 74], + [81, 84], + [91, 94]]) + >>> z.oindex[[1, 4], [1, 4]] + array([[11, 14], + [41, 44]]) + >>> sel = np.zeros(z.shape[0], dtype=bool) + >>> sel[1] = True + >>> sel[4] = True + >>> z.oindex[sel, sel] + array([[11, 14], + [41, 44]]) + + Notes + ----- + Orthogonal indexing is also known as outer indexing. + + Slices with step > 1 are supported, but slices with negative step are not. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( @@ -1202,6 +1309,104 @@ def set_orthogonal_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> None: + """Modify data via a selection for each dimension of the array. + + Parameters + ---------- + selection : tuple + A selection for each dimension of the array. May be any combination of int, + slice, integer array or Boolean array. + value : NDArrayLike + A NumPy-like array containing the data to be stored in the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + prototype : BufferPrototype, optional + The prototype of the buffer used for setting the data. If not provided, the + default buffer prototype is used. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + + Set data for a selection of rows:: + + >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [1, 1, 1, 1, 1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 1, 1]]) + + Set data for a selection of columns:: + + >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) + >>> z[...] + array([[0, 2, 0, 0, 2], + [1, 2, 1, 1, 2], + [0, 2, 0, 0, 2], + [0, 2, 0, 0, 2], + [1, 2, 1, 1, 2]]) + + Set data for a selection of rows and columns:: + + >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) + >>> z[...] + array([[0, 2, 0, 0, 2], + [1, 3, 1, 1, 3], + [0, 2, 0, 0, 2], + [0, 2, 0, 0, 2], + [1, 3, 1, 1, 3]]) + + Set data from a 2D array:: + + >>> values = np.arange(10).reshape(2, 5) + >>> z.set_orthogonal_selection(([0, 3], ...), values) + >>> z[...] + array([[0, 1, 2, 3, 4], + [1, 3, 1, 1, 3], + [0, 2, 0, 0, 2], + [5, 6, 7, 8, 9], + [1, 3, 1, 1, 3]]) + + For convenience, this functionality is also available via the `oindex` property. + E.g.:: + + >>> z.oindex[[1, 4], [1, 4]] = 4 + >>> z[...] + array([[0, 1, 2, 3, 4], + [1, 4, 1, 1, 4], + [0, 2, 0, 0, 2], + [5, 6, 7, 8, 9], + [1, 4, 1, 1, 4]]) + + Notes + ----- + Orthogonal indexing is also known as outer indexing. + + Slices with step > 1 are supported, but slices with negative step are not. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype) @@ -1453,7 +1658,7 @@ def set_block_selection( Parameters ---------- - selection : int or slice or tuple of int or slice + selection : tuple An integer (coordinate) or slice for each dimension of the array. value : NDArrayLike A NumPy-like array containing the data to be stored in the block selection. From 9bead38069bdf757af7bb9746356a778702bf317 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 13 Jun 2024 13:42:36 +0200 Subject: [PATCH 10/18] add docstring for set and get_mask_selection --- src/zarr/array.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index 5e985c0299..3bdfc805ee 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1420,6 +1420,71 @@ def get_mask_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> NDArrayLike: + """Retrieve a selection of individual items, by providing a Boolean array of the + same shape as the array against which the selection is being made, where True + values indicate a selected item. + + Parameters + ---------- + selection : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + out : NDBuffer, optional + If given, load the selected data directly into this buffer. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to + extract data for. + prototype : BufferPrototype, optional + The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. + + Returns + ------- + out : NDArrayLike + A NumPy-like array containing the data for the requested selection. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.arange(100).reshape(10, 10) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Retrieve items by specifying a mask:: + + >>> sel = np.zeros_like(z, dtype=bool) + >>> sel[1, 1] = True + >>> sel[4, 4] = True + >>> z.get_mask_selection(sel) + array([11, 44]) + + For convenience, the mask selection functionality is also available via the + `vindex` property, e.g.:: + + >>> z.vindex[sel] + array([11, 44]) + + Notes + ----- + Mask indexing is a form of vectorized or inner indexing, and is equivalent to + coordinate indexing. Internally the mask array is converted to coordinate + arrays by calling `np.nonzero`. + + See Also + -------- + get_basic_selection, set_basic_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + """ + indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( @@ -1435,6 +1500,74 @@ def set_mask_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> None: + """Modify a selection of individual items, by providing a Boolean array of the + same shape as the array against which the selection is being made, where True + values indicate a selected item. + + Parameters + ---------- + selection : ndarray, bool + A Boolean array of the same shape as the array against which the selection is + being made. + value : scalar or array-like + Value to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set data for a selection of items:: + + >>> sel = np.zeros_like(z, dtype=bool) + >>> sel[1, 1] = True + >>> sel[4, 4] = True + >>> z.set_mask_selection(sel, 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 1]]) + + For convenience, this functionality is also available via the `vindex` property. + E.g.:: + + >>> z.vindex[sel] = 2 + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 2, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 2]]) + + Notes + ----- + Mask indexing is a form of vectorized or inner indexing, and is equivalent to + coordinate indexing. Internally the mask array is converted to coordinate + arrays by calling `np.nonzero`. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + set_coordinate_selection, get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) From 9845cd33ba01dfd96bb79b66977b346a27cff3ff Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 13 Jun 2024 13:50:05 +0200 Subject: [PATCH 11/18] add docstring for set_coordinate_selection --- src/zarr/array.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index 3bdfc805ee..bf2a020970 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1665,6 +1665,71 @@ def set_coordinate_selection( fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, ) -> None: + """Modify a selection of individual items, by providing the indices (coordinates) + for each item to be modified. + + Parameters + ---------- + selection : tuple + An integer (coordinate) array for each dimension of the array. + value : scalar or array-like + Value to be stored into the array. + fields : str or sequence of str, optional + For arrays with a structured dtype, one or more fields can be specified to set + data for. + + Examples + -------- + Setup a 2-dimensional array:: + + >>> import zarr + >>> import numpy as np + >>> data = np.zeros(0, 25, dtype="uint16").reshape(5, 5) + >>> z = Array.create( + >>> StorePath(MemoryStore(mode="w")), + >>> shape=data.shape, + >>> chunk_shape=data.shape, + >>> dtype=data.dtype, + >>> ) + >>> z[:] = data + + Set data for a selection of items:: + + >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 1]]) + + For convenience, this functionality is also available via the `vindex` property. + E.g.:: + + >>> z.vindex[[1, 4], [1, 4]] = 2 + >>> z[...] + array([[0, 0, 0, 0, 0], + [0, 2, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 2]]) + + Notes + ----- + Coordinate indexing is also known as point selection, and is a form of vectorized + or inner indexing. + + Slices are not supported. Coordinate arrays must be provided for all dimensions + of the array. + + See Also + -------- + get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, + get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, + get_block_selection, set_block_selection, + vindex, oindex, blocks, __getitem__, __setitem__ + + """ # setup indexer indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) From 6b31189e9893481717a1ea49d9620408035e5310 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 13 Jun 2024 14:06:13 +0200 Subject: [PATCH 12/18] add docstring for oindex and vindex --- src/zarr/array.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index bf2a020970..25c4e018b1 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1935,10 +1935,15 @@ def set_block_selection( @property def vindex(self) -> VIndex: + """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, + :func:`set_coordinate_selection`, :func:`get_mask_selection` and + :func:`set_mask_selection` for documentation and examples.""" return VIndex(self) @property def oindex(self) -> OIndex: + """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and + :func:`set_orthogonal_selection` for documentation and examples.""" return OIndex(self) @property From 7531bb35d5d5536712d7befd336d2c11efb707db Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 10:13:04 +0200 Subject: [PATCH 13/18] ruff formatting --- src/zarr/array.py | 2 +- tests/v3/test_codecs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 25c4e018b1..7b1aa8b54e 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -60,7 +60,7 @@ pop_fields, ) from zarr.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata -from zarr.store import StoreLike, StorePath, make_store_path, MemoryStore +from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import sync diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 1ce03ed0b0..7cb0d0f804 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -22,7 +22,7 @@ ZstdCodec, ) from zarr.config import config -from zarr.indexing import morton_order_iter, Selection +from zarr.indexing import Selection, morton_order_iter from zarr.store import MemoryStore, StorePath from zarr.testing.utils import assert_bytes_equal From b2dcc188ac7b3c0b441574e241cc5d4047a395fb Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 13:28:49 +0200 Subject: [PATCH 14/18] setting input interfaces values to npt.ArrayLike --- src/zarr/api/asynchronous.py | 2 +- src/zarr/array.py | 79 +++++++++++++++++++----------------- src/zarr/buffer.py | 2 + src/zarr/indexing.py | 8 ++-- 4 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 52d07fb6fe..6cf7378bfa 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -322,7 +322,7 @@ async def tree(*args: Any, **kwargs: Any) -> None: raise NotImplementedError -async def array(data: NDArrayLike, **kwargs: Any) -> AsyncArray: +async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray: """Create an array filled with `data`. Parameters diff --git a/src/zarr/array.py b/src/zarr/array.py index 7b1aa8b54e..65dd1bfad8 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -477,7 +477,7 @@ async def _save_metadata(self, metadata: ArrayMetadata) -> None: async def _set_selection( self, indexer: Indexer, - value: NDArrayLike, + value: npt.ArrayLike, *, prototype: BufferPrototype, fields: Fields | None = None, @@ -495,8 +495,11 @@ async def _set_selection( # assert ( # value.shape == indexer.shape # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" - if value.dtype.name != self.metadata.dtype.name: - value = value.astype(self.metadata.dtype, order="A") + if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name: + value = np.array(value, dtype=self.metadata.dtype, order="A") + + #now the value should be ndarray like + assert isinstance(value, NDArrayLike) # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass @@ -521,7 +524,7 @@ async def _set_selection( async def setitem( self, selection: BasicSelection, - value: NDArrayLike, + value: npt.ArrayLike, prototype: BufferPrototype = default_buffer_prototype, ) -> None: indexer = BasicIndexer( @@ -712,8 +715,8 @@ def __getitem__(self, selection: Selection) -> NDArrayLike: Returns ------- - out : NDArrayLike - A NumPy-like array containing the data for the requested region. + NDArrayLike + An array-like containing the data for the requested region. Examples -------- @@ -852,8 +855,7 @@ def __getitem__(self, selection: Selection) -> NDArrayLike: else: return self.get_basic_selection(cast(BasicSelection, pure_selection), fields=fields) - # TODO is int or python lists as value supported? if so, adjust typing - def __setitem__(self, selection: Selection, value: NDArrayLike) -> None: + def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: """Modify data for an item or region of the array. Parameters @@ -861,8 +863,8 @@ def __setitem__(self, selection: Selection, value: NDArrayLike) -> None: selection : tuple An integer index or slice or tuple of int/slice specifying the requested region for each dimension of the array. - value : NDArrayLike - A NumPy-like array containing the data to be stored in the selection. + value : npt.ArrayLike + An array-like containing the data to be stored in the selection. Examples -------- @@ -982,8 +984,8 @@ def get_basic_selection( Returns ------- - out : NDArrayLike - A NumPy-like array containing the data for the requested region. + NDArrayLike + An array-like containing the data for the requested region. Examples -------- @@ -1084,7 +1086,7 @@ def get_basic_selection( def set_basic_selection( self, selection: BasicSelection, - value: NDArrayLike, + value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, @@ -1096,8 +1098,8 @@ def set_basic_selection( selection : tuple A tuple specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. - value : NDArrayLike - Values to be stored into the array. + value : npt.ArrayLike + An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. @@ -1208,8 +1210,8 @@ def get_orthogonal_selection( Returns ------- - out : NDArrayLike - A NumPy-like array containing the data for the requested selection. + NDArrayLike + An array-like containing the data for the requested selection. Examples -------- @@ -1304,7 +1306,7 @@ def get_orthogonal_selection( def set_orthogonal_selection( self, selection: OrthogonalSelection, - value: NDArrayLike, + value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, @@ -1316,8 +1318,8 @@ def set_orthogonal_selection( selection : tuple A selection for each dimension of the array. May be any combination of int, slice, integer array or Boolean array. - value : NDArrayLike - A NumPy-like array containing the data to be stored in the array. + value : npt.ArrayLike + An array-like array containing the data to be stored in the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. @@ -1439,8 +1441,8 @@ def get_mask_selection( Returns ------- - out : NDArrayLike - A NumPy-like array containing the data for the requested selection. + NDArrayLike + An array-like containing the data for the requested selection. Examples -------- @@ -1495,7 +1497,7 @@ def get_mask_selection( def set_mask_selection( self, mask: MaskSelection, - value: NDArrayLike, + value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, @@ -1509,8 +1511,8 @@ def set_mask_selection( selection : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. - value : scalar or array-like - Value to be stored into the array. + value : npt.ArrayLike + An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. @@ -1578,7 +1580,7 @@ def get_coordinate_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, - ) -> NDArrayLike: + ) -> NDArrayLike : """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -1596,8 +1598,8 @@ def get_coordinate_selection( Returns ------- - out : NDArrayLike - A NumPy-like array containing the data for the requested coordinate selection. + NDArrayLike + An array-like containing the data for the requested coordinate selection. Examples -------- @@ -1653,14 +1655,15 @@ def get_coordinate_selection( ) ) - # restore shape - out_array = out_array.reshape(indexer.sel_shape) + if(hasattr(out_array, "shape")): + # restore shape + out_array = np.array(out_array).reshape(indexer.sel_shape) return out_array def set_coordinate_selection( self, selection: CoordinateSelection, - value: NDArrayLike, + value: npt.ArrayLike , *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, @@ -1672,8 +1675,8 @@ def set_coordinate_selection( ---------- selection : tuple An integer (coordinate) array for each dimension of the array. - value : scalar or array-like - Value to be stored into the array. + value : npt.ArrayLike + An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. @@ -1743,7 +1746,7 @@ def set_coordinate_selection( # Handle types like `list` or `tuple` value = np.array(value) # TODO replace with agnostic if hasattr(value, "shape") and len(value.shape) > 1: - value = value.reshape(-1) + value = np.array(value).reshape(-1) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @@ -1773,7 +1776,7 @@ def get_block_selection( Returns ------- NDArrayLike - A NumPy-like array containing the data for the requested block selection. + An array-like containing the data for the requested block selection. Examples -------- @@ -1846,7 +1849,7 @@ def get_block_selection( def set_block_selection( self, selection: BlockSelection, - value: NDArrayLike, + value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, @@ -1858,8 +1861,8 @@ def set_block_selection( ---------- selection : tuple An integer (coordinate) or slice for each dimension of the array. - value : NDArrayLike - A NumPy-like array containing the data to be stored in the block selection. + value : npt.ArrayLike + An array-like containing the data to be stored in the block selection. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. diff --git a/src/zarr/buffer.py b/src/zarr/buffer.py index 81de5a6675..44691ea352 100644 --- a/src/zarr/buffer.py +++ b/src/zarr/buffer.py @@ -64,6 +64,8 @@ def __getitem__(self, key: slice) -> Self: ... def __setitem__(self, key: slice, value: Any) -> None: ... + def __array__(self) -> npt.NDArray[Any]: ... + def reshape( self, shape: ChunkCoords | Literal[-1], *, order: Literal["A", "C", "F"] = ... ) -> Self: ... diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 069dad5804..1546f2d222 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -23,11 +23,11 @@ import numpy as np import numpy.typing as npt +from zarr.buffer import NDArrayLike from zarr.common import ChunkCoords, product if TYPE_CHECKING: from zarr.array import Array - from zarr.buffer import NDArrayLike from zarr.chunk_grids import ChunkGrid IntOrSlice = int | slice @@ -836,7 +836,7 @@ def __getitem__(self, selection: OrthogonalSelection) -> NDArrayLike: cast(OrthogonalSelection, new_selection), fields=fields ) - def __setitem__(self, selection: OrthogonalSelection, value: NDArrayLike) -> None: + def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) @@ -936,7 +936,7 @@ def __getitem__(self, selection: BlockSelection) -> NDArrayLike: new_selection = replace_lists(new_selection) return self.array.get_block_selection(cast(BlockSelection, new_selection), fields=fields) - def __setitem__(self, selection: BlockSelection, value: NDArrayLike) -> None: + def __setitem__(self, selection: BlockSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) @@ -1128,7 +1128,7 @@ def __getitem__(self, selection: CoordinateSelection | MaskSelection) -> NDArray raise VindexInvalidSelectionError(new_selection) def __setitem__( - self, selection: CoordinateSelection | MaskSelection, value: NDArrayLike + self, selection: CoordinateSelection | MaskSelection, value: npt.ArrayLike ) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) From 0e8cb26a6921bc904a8a5445ca3a05e32ca631d8 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 16:41:04 +0200 Subject: [PATCH 15/18] improve typing --- src/zarr/array.py | 10 +++------- src/zarr/indexing.py | 26 ++++++++++++-------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 65dd1bfad8..eed86c1126 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -40,7 +40,6 @@ BasicSelection, BlockIndex, BlockIndexer, - BlockSelection, CoordinateIndexer, CoordinateSelection, Fields, @@ -497,10 +496,7 @@ async def _set_selection( # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name: value = np.array(value, dtype=self.metadata.dtype, order="A") - - #now the value should be ndarray like - assert isinstance(value, NDArrayLike) - + value = cast(NDArrayLike, value) # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass # Buffer and NDBuffer between components. @@ -1752,7 +1748,7 @@ def set_coordinate_selection( def get_block_selection( self, - selection: BlockSelection, + selection: BasicSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, @@ -1848,7 +1844,7 @@ def get_block_selection( def set_block_selection( self, - selection: BlockSelection, + selection: BasicSelection, value: npt.ArrayLike, *, fields: Fields | None = None, diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 1546f2d222..80a4170be6 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -30,22 +30,20 @@ from zarr.array import Array from zarr.chunk_grids import ChunkGrid -IntOrSlice = int | slice -IntOrSliceOrEllipsis = IntOrSlice | EllipsisType IntSequence = list[int] | npt.NDArray[np.intp] -IntOrSliceOrArray = IntOrSlice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] +ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] +BasicSelector = int | slice | EllipsisType +Selector = BasicSelector | ArrayOfIntOrBool -BasicSelection = IntOrSliceOrEllipsis | tuple[IntOrSliceOrEllipsis, ...] +BasicSelection = BasicSelector | tuple[BasicSelector, ...] # also used for BlockIndex CoordinateSelection = IntSequence | tuple[IntSequence, ...] -BlockSelection = IntOrSlice | tuple[IntOrSlice, ...] MaskSelection = npt.NDArray[np.bool_] -OrthogonalSelection = IntOrSliceOrArray | tuple[IntOrSliceOrArray, ...] +OrthogonalSelection = Selector | tuple[Selector, ...] Selection = ( - BasicSelection | CoordinateSelection | BlockSelection | MaskSelection | OrthogonalSelection + BasicSelection | CoordinateSelection | MaskSelection | OrthogonalSelection ) CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] -SelectionNormalized = tuple[IntOrSliceOrArray, ...] | CoordinateSelectionNormalized | MaskSelection -Selector = IntOrSlice | npt.NDArray[np.intp] | npt.NDArray[np.bool_] +SelectionNormalized = tuple[Selector, ...] | ArrayOfIntOrBool SelectionWithFields = Selection | str | Sequence[str] SelectorTuple = tuple[Selector, ...] | npt.NDArray[np.intp] | slice Fields = str | list[str] | tuple[str, ...] @@ -851,7 +849,7 @@ class BlockIndexer(Indexer): shape: ChunkCoords drop_axes: ChunkCoords - def __init__(self, selection: BlockSelection, shape: ChunkCoords, chunk_grid: ChunkGrid): + def __init__(self, selection: BasicSelection, shape: ChunkCoords, chunk_grid: ChunkGrid): chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis @@ -930,18 +928,18 @@ def __iter__(self) -> Iterator[ChunkProjection]: class BlockIndex: array: Array - def __getitem__(self, selection: BlockSelection) -> NDArrayLike: + def __getitem__(self, selection: BasicSelection) -> NDArrayLike: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) - return self.array.get_block_selection(cast(BlockSelection, new_selection), fields=fields) + return self.array.get_block_selection(cast(BasicSelection, new_selection), fields=fields) - def __setitem__(self, selection: BlockSelection, value: npt.ArrayLike) -> None: + def __setitem__(self, selection: BasicSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return self.array.set_block_selection( - cast(BlockSelection, new_selection), value, fields=fields + cast(BasicSelection, new_selection), value, fields=fields ) From 9e6fe4d18fe5ea43d4ecd16e482e9c5654eb2622 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 17:23:46 +0200 Subject: [PATCH 16/18] improve docstring examples --- src/zarr/array.py | 118 +++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 70 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index eed86c1126..e42114c189 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -867,15 +867,12 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: Setup a 1-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(100, dtype="uint16") - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=(2, 2), - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(100,), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5,), + >>> dtype="i4", + >>> ) Set all array elements to the same scalar value:: @@ -892,14 +889,12 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: Setup a 2-dimensional array:: - >>> data = np.zeros(0, 25, dtype="uint16").reshape(5, 5) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) Set all array elements to the same scalar value:: @@ -989,7 +984,7 @@ def get_basic_selection( >>> import zarr >>> import numpy as np - >>> data = np.arange(0, 100, dtype="uint16") + >>> data = np.arange(100, dtype="uint16") >>> z = Array.create( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, @@ -1108,15 +1103,12 @@ def set_basic_selection( Setup a 1-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(0, 100, dtype="uint16") - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(100,), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(100,), + >>> dtype="i4", + >>> ) Set all array elements to the same scalar value:: @@ -1133,14 +1125,12 @@ def set_basic_selection( Setup a 2-dimensional array:: - >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) Set all array elements to the same scalar value:: @@ -1328,15 +1318,12 @@ def set_orthogonal_selection( Setup a 2-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) Set data for a selection of rows:: @@ -1518,15 +1505,12 @@ def set_mask_selection( Setup a 2-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(25, dtype="uint16").reshape((5, 5)) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) Set data for a selection of items:: @@ -1682,15 +1666,12 @@ def set_coordinate_selection( Setup a 2-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(0, 25, dtype="uint16").reshape(5, 5) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunk_shape=data.shape, - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> z = zarr.zeros( + >>> shape=(5, 5), + >>> store=StorePath(MemoryStore(mode="w")), + >>> chunk_shape=(5, 5), + >>> dtype="i4", + >>> ) Set data for a selection of items:: @@ -1871,15 +1852,12 @@ def set_block_selection( Set up a 2-dimensional array:: >>> import zarr - >>> import numpy as np - >>> data = np.zeros(36, dtype="uint16").reshape((6, 6)) - >>> z = Array.create( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, + >>> z = zarr.zeros( + >>> shape=(6, 6), + >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(2, 2), - >>> dtype=data.dtype, - >>> ) - >>> z[:] = data + >>> dtype="i4", + >>> ) Set data for a selection of items:: From 579fb8f683af33c2813c7fe9106ce8a0878f203d Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 17:48:05 +0200 Subject: [PATCH 17/18] add docstring for Array.resize --- src/zarr/array.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index e42114c189..9138a58115 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1930,6 +1930,42 @@ def blocks(self) -> BlockIndex: return BlockIndex(self) def resize(self, new_shape: ChunkCoords) -> Array: + """ + Change the shape of the array by growing or shrinking one or more + dimensions. + + This method does not modify the original Array object. Instead, it returns a new Array + with the specified shape. + + Examples + -------- + >>> import zarr + >>> z = zarr.zeros(shape=(10000, 10000), + >>> chunk_shape=(1000, 1000), + >>> store=StorePath(MemoryStore(mode="w")), + >>> dtype="i4",) + >>> z.shape + (10000, 10000) + >>> z = z.resize(20000, 1000) + >>> z.shape + (20000, 1000) + >>> z2 = z.resize(50, 50) + >>> z.shape + (20000, 1000) + >>> z2.shape + (50, 50) + + Notes + ----- + When resizing an array, the data are not rearranged in any way. + + If one or more dimensions are shrunk, any chunks falling outside the + new array shape will be deleted from the underlying store. + However, it is noteworthy that the chunks partially falling inside the new array + (i.e. boundary chunks) will remain intact, and therefore, + the data falling outside the new array but inside the boundary chunks + would be restored by a subsequent resize operation that grows the array size. + """ return type(self)( sync( self._async_array.resize(new_shape), From 87d9d587705491c8a1f8c3f7a1a7baa2fb2007b9 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 14 Jun 2024 17:57:31 +0200 Subject: [PATCH 18/18] ruff format --- src/zarr/array.py | 6 +++--- src/zarr/indexing.py | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 9138a58115..3b5ecce8ee 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -1560,7 +1560,7 @@ def get_coordinate_selection( out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, - ) -> NDArrayLike : + ) -> NDArrayLike: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. @@ -1635,7 +1635,7 @@ def get_coordinate_selection( ) ) - if(hasattr(out_array, "shape")): + if hasattr(out_array, "shape"): # restore shape out_array = np.array(out_array).reshape(indexer.sel_shape) return out_array @@ -1643,7 +1643,7 @@ def get_coordinate_selection( def set_coordinate_selection( self, selection: CoordinateSelection, - value: npt.ArrayLike , + value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype = default_buffer_prototype, diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 80a4170be6..74cbbe8c6b 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -39,9 +39,7 @@ CoordinateSelection = IntSequence | tuple[IntSequence, ...] MaskSelection = npt.NDArray[np.bool_] OrthogonalSelection = Selector | tuple[Selector, ...] -Selection = ( - BasicSelection | CoordinateSelection | MaskSelection | OrthogonalSelection -) +Selection = BasicSelection | CoordinateSelection | MaskSelection | OrthogonalSelection CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] SelectionNormalized = tuple[Selector, ...] | ArrayOfIntOrBool SelectionWithFields = Selection | str | Sequence[str]