From cdb21e398aa8588c96278fe71ed77ca14e0b7169 Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:14:50 -0600 Subject: [PATCH 1/6] start to docstrings for arraymodule --- src/zarr/core/array.py | 316 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e1de15c747..1ab18d634e 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -161,6 +161,77 @@ async def get_array_metadata( @dataclass(frozen=True) class AsyncArray: + """ + An asynchronous array class representing a chunked array stored in a Zarr store. + + Parameters + ---------- + metadata : ArrayMetadata + The metadata of the array. + store_path : StorePath + The path to the Zarr store. + codec_pipeline : CodecPipeline, optional + The codec pipeline used for encoding and decoding chunks, by default None. + order : {'C', 'F'}, optional + The order of the array data in memory, by default None. + + Attributes + ---------- + metadata : ArrayMetadata + The metadata of the array. + store_path : StorePath + The path to the Zarr store. + codec_pipeline : CodecPipeline + The codec pipeline used for encoding and decoding chunks. + order : {'C', 'F'} + The order of the array data in memory. + + Methods + ------- + create(store, ...) + Create a new AsyncArray in the given store. + open(store, ...) + Open an existing AsyncArray from the given store. + resize(new_shape, ...) + Resize the AsyncArray to the specified shape. + update_attributes(new_attributes, ...) + Update the attributes of the AsyncArray. + info() + Print information about the AsyncArray. + + Properties + ---------- + ndim : int + The number of dimensions of the array. + shape : Tuple[int] + The shape of the array. + chunks : Tuple[int] + The shape of the chunks in the array. + size : int + The total number of elements in the array. + dtype : numpy.dtype + The data type of the array. + attrs : dict + The attributes of the array. + read_only : bool + Whether the array is read-only. + path : str + The path to the array in the Zarr store. + name : str or None + The name of the array. + basename : str or None + The basename of the array. + cdata_shape : Tuple[int] + The shape of the array in chunk grid coordinates. + nchunks : int + The total number of chunks in the array. + nchunks_initialized : int + The number of initialized chunks in the array. + nbytes : int + The total number of bytes used by the array data. + + """ + metadata: ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) @@ -218,6 +289,69 @@ async def create( exists_ok: bool = False, data: npt.ArrayLike | None = None, ) -> AsyncArray: + """ + Method to create a new asynchronous array instance. + + Parameters + ---------- + store : StoreLike + The store where the array will be created. + shape : ShapeLike + The shape of the array. + dtype : npt.DTypeLike + The data type of the array. + zarr_format : ZarrFormat, optional + The Zarr format version (default is 3). + fill_value : Any, optional + The fill value of the array (default is None). + attributes : dict[str, JSON], optional + The attributes of the array (default is None). + chunk_shape : ChunkCoords, optional + The shape of the array's chunks (default is None). + chunk_key_encoding : ChunkKeyEncoding, optional + The chunk key encoding (default is None). + codecs : Iterable[Codec | dict[str, JSON]], optional + The codecs used to encode the data (default is None). + dimension_names : Iterable[str], optional + The names of the dimensions (default is None). + chunks : ShapeLike, optional + The shape of the array's chunks (default is None). + V2 only. V3 arrays should not have 'chunks' parameter. + dimension_separator : Literal[".", "/"], optional + The dimension separator (default is None). + V2 only. V3 arrays cannot have a dimension separator. + order : Literal["C", "F"], optional + The order of the array (default is None). + V2 only. V3 arrays should not have 'order' parameter. + filters : list[dict[str, JSON]], optional + The filters used to compress the data (default is None). + V2 only. V3 arrays should not have 'filters' parameter. + compressor : dict[str, JSON], optional + The compressor used to compress the data (default is None). + V2 only. V3 arrays should not have 'compressor' parameter. + exists_ok : bool, optional + Whether to raise an error if the store already exists (default is False). + data : npt.ArrayLike, optional + The data to be inserted into the array (default is None). + + Returns + ------- + AsyncArray + The created asynchronous array instance. + + Examples + -------- + >>> import zarr + >>> store = zarr.store.MemoryStore(mode='w') + >>> async_arr = await zarr.core.array.AsyncArray.create( + >>> store=store, + >>> shape=(100,100), + >>> chunks=(10,10), + >>> dtype='i4', + >>> fill_value=0) + + + """ store_path = await make_store_path(store) shape = parse_shapelike(shape) @@ -400,20 +534,66 @@ async def open( store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AsyncArray: + """ + Async method to open an existing Zarr array from a given store. + + Parameters + ---------- + store : StoreLike + The store containing the Zarr array. + zarr_format : ZarrFormat | None, optional + The Zarr format version (default is 3). + + Returns + ------- + AsyncArray + The opened Zarr array. + + Examples + -------- + >>> import zarr + >>> store = zarr.store.MemoryStore(mode='w') + >>> async_arr = await AsyncArray.open(store) + + """ store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) return cls(store_path=store_path, metadata=metadata_dict) @property def ndim(self) -> int: + """Returns the number of dimensions in the Array. + + Returns + ------- + int + The number of dimensions in the Array. + """ return len(self.metadata.shape) @property def shape(self) -> ChunkCoords: + """Returns the shape of the Array. + + Returns + ------- + tuple + The shape of the Array. + """ return self.metadata.shape @property def chunks(self) -> ChunkCoords: + """Returns the chunk shape of the Array. + + Only defined for arrays using using `RegularChunkGrid`. + If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. + + Returns + ------- + ChunkCoords: + The chunk shape of the Array. + """ if isinstance(self.metadata.chunk_grid, RegularChunkGrid): return self.metadata.chunk_grid.chunk_shape @@ -618,6 +798,36 @@ async def getitem( *, prototype: BufferPrototype | None = None, ) -> NDArrayLike: + """ + Asynchronous function that retrieves a subset of the array's data based on the provided selection. + + Parameters + ---------- + selection : BasicSelection + A selection object specifying the subset of data to retrieve. + prototype : BufferPrototype, optional + A buffer prototype to use for the retrieved data (default is None). + + Returns + ------- + NDArrayLike + The retrieved subset of the array's data. + + Examples + -------- + >>> import zarr + >>> store = zarr.store.MemoryStore(mode='w') + >>> async_arr = await zarr.core.array.AsyncArray.create( + >>> store=store, + >>> shape=(100,100), + >>> chunks=(10,10), + >>> dtype='i4', + >>> fill_value=0) + + >>> await async_arr.getitem((0,1)) + array(0, dtype=int32) + + """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( @@ -759,6 +969,18 @@ async def info(self) -> None: @dataclass(frozen=True) class Array: + """Instantiate an array from an initialized store. + + Parameters + ---------- + store : StoreLike + The array store that has already been initialized. + shape : ChunkCoords + The shape of the array. + dtype : npt.DTypeLike + The dtype of the array. + """ + _async_array: AsyncArray @classmethod @@ -792,6 +1014,42 @@ def create( # runtime exists_ok: bool = False, ) -> Array: + """Creates a new Array instance from an initialized store. + + Parameters + ---------- + store : StoreLike + The array store that has already been initialized. + shape : ChunkCoords + The shape of the array. + dtype : npt.DTypeLike + The data type of the array. + chunk_shape : ChunkCoords, optional + The shape of the Array's chunks (default is None). + chunk_key_encoding : ChunkKeyEncoding, optional + The chunk key encoding (default is None). + codecs : Iterable[Codec | dict[str, JSON]], optional + The codecs used to encode the data (default is None). + dimension_names : Iterable[str], optional + The names of the dimensions (default is None). + chunks : ChunkCoords, optional + The shape of the Array's chunks (default is None). + dimension_separator : Literal[".", "/"], optional + The dimension separator (default is None). + order : Literal["C", "F"], optional + The order of the array (default is None). + filters : list[dict[str, JSON]], optional + The filters used to compress the data (default is None). + compressor : dict[str, JSON], optional + The compressor used to compress the data (default is None). + exists_ok : bool, optional + Whether to raise an error if the store already exists (default is False). + + Returns + ------- + Array + Array created from the store. + """ async_array = sync( AsyncArray.create( store=store, @@ -828,31 +1086,89 @@ def open( cls, store: StoreLike, ) -> Array: + """Opens an existing Array from a store. + + Parameters + ---------- + store : Store + Store containing the Array. + + Returns + ------- + Array + Array opened from the store. + """ async_array = sync(AsyncArray.open(store)) return cls(async_array) @property def ndim(self) -> int: + """Returns the number of dimensions in the array. + + Returns + ------- + int + The number of dimensions in the array. + """ return self._async_array.ndim @property def shape(self) -> ChunkCoords: + """Returns the shape of the array. + + Returns + ------- + ChunkCoords + The shape of the array. + """ return self._async_array.shape @property def chunks(self) -> ChunkCoords: + """Returns a tuple of integers describing the length of each dimension of a chunk of the array. + + Returns + ------- + tuple + A tuple of integers representing the length of each dimension of a chunk. + """ return self._async_array.chunks @property def size(self) -> int: + """Returns the total number of elements in the array. + + Returns + ------- + int + Total number of elements in the array. + """ return self._async_array.size @property def dtype(self) -> np.dtype[Any]: + """Returns the NumPy data type. + + Returns + ------- + np.dtype + The NumPy data type. + """ return self._async_array.dtype @property def attrs(self) -> Attributes: + """Returns a MutableMapping containing user-defined attributes. + + Returns + ------- + attrs : MutableMapping + A MutableMapping object containing user-defined attributes. + + Notes + ----- + Note that attribute values must be JSON serializable. + """ return Attributes(self) @property From 461b575745b492ef662d5e42d6c62cdee5aa89d8 Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:41:38 -0600 Subject: [PATCH 2/6] incorporating toms edits, overriding mypy error... --- src/zarr/core/array.py | 153 +++++++++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 1ab18d634e..99344a5b0d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -186,50 +186,22 @@ class AsyncArray: order : {'C', 'F'} The order of the array data in memory. - Methods - ------- - create(store, ...) - Create a new AsyncArray in the given store. - open(store, ...) - Open an existing AsyncArray from the given store. - resize(new_shape, ...) - Resize the AsyncArray to the specified shape. - update_attributes(new_attributes, ...) - Update the attributes of the AsyncArray. - info() - Print information about the AsyncArray. - Properties ---------- - ndim : int - The number of dimensions of the array. - shape : Tuple[int] - The shape of the array. - chunks : Tuple[int] - The shape of the chunks in the array. - size : int - The total number of elements in the array. - dtype : numpy.dtype - The data type of the array. - attrs : dict - The attributes of the array. - read_only : bool - Whether the array is read-only. - path : str - The path to the array in the Zarr store. - name : str or None - The name of the array. - basename : str or None - The basename of the array. - cdata_shape : Tuple[int] - The shape of the array in chunk grid coordinates. - nchunks : int - The total number of chunks in the array. - nchunks_initialized : int - The number of initialized chunks in the array. - nbytes : int - The total number of bytes used by the array data. - + ndim + shape + chunks + size + dtype + attrs + read_only + path + name + basename + cdata_shape + nchunks + nchunks_initialized + nbytes """ metadata: ArrayMetadata @@ -552,9 +524,9 @@ async def open( Examples -------- >>> import zarr - >>> store = zarr.store.MemoryStore(mode='w') - >>> async_arr = await AsyncArray.open(store) - + >>> store = zarr.store.MemoryStore(mode='w') + >>> async_arr = await AsyncArray.open(store) # doctest: +ELLIPSIS + """ store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) @@ -605,28 +577,72 @@ def chunks(self) -> ChunkCoords: @property def size(self) -> int: + """Returns the total number of elements in the array + + Returns + ------- + int + Total number of elements in the array + """ return np.prod(self.metadata.shape).item() @property def dtype(self) -> np.dtype[Any]: + """Returns the data type of the array. + + Returns + ------- + np.dtype + Data type of the array + """ return self.metadata.dtype @property def attrs(self) -> dict[str, JSON]: - return self.metadata.attributes + """Returns the attributes of the array. + + Returns + ------- + dict + Attributes of the array + """ + if self.metadata.attributes is None: + return {} + else: + return self.metadata.attributes @property def read_only(self) -> bool: + """Returns True if the array is read-only. + + Returns + ------- + bool + True if the array is read-only + """ + # Backwards compatibility for 2.x return self.store_path.store.mode.readonly @property def path(self) -> str: - """Storage path.""" + """Storage path. + + Returns + ------- + str + The path to the array in the Zarr store. + """ return self.store_path.path @property def name(self) -> str | None: - """Array name following h5py convention.""" + """Array name following h5py convention. + + Returns + ------- + str + The name of the array. + """ if self.path: # follow h5py convention: add leading slash name = self.path @@ -637,7 +653,13 @@ def name(self) -> str | None: @property def basename(self) -> str | None: - """Final component of name.""" + """Final component of name. + + Returns + ------- + str + The basename or final component of the array name. + """ if self.name is not None: return self.name.split("/")[-1] return None @@ -646,6 +668,11 @@ def basename(self) -> str | None: def cdata_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. + + Returns + ------- + Tuple[int] + The shape of the chunk grid for this array. """ return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) @@ -653,6 +680,11 @@ def cdata_shape(self) -> ChunkCoords: def nchunks(self) -> int: """ The number of chunks in the stored representation of this array. + + Returns + ------- + int + The total number of chunks in the array. """ return product(self.cdata_shape) @@ -660,6 +692,11 @@ def nchunks(self) -> int: def nchunks_initialized(self) -> int: """ The number of chunks that have been persisted in storage. + + Returns + ------- + int + The number of initialized chunks in the array. """ return nchunks_initialized(self) @@ -816,15 +853,15 @@ async def getitem( Examples -------- >>> import zarr - >>> store = zarr.store.MemoryStore(mode='w') - >>> async_arr = await zarr.core.array.AsyncArray.create( - >>> store=store, - >>> shape=(100,100), - >>> chunks=(10,10), - >>> dtype='i4', - >>> fill_value=0) - - >>> await async_arr.getitem((0,1)) + >>> store = zarr.store.MemoryStore(mode='w') + >>> async_arr = await zarr.core.array.AsyncArray.create( + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype='i4', + ... fill_value=0) + + >>> await async_arr.getitem((0,1)) # doctest: +ELLIPSIS array(0, dtype=int32) """ From 99f896ed710455b5fbe3a7c183043cb662956747 Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:59:54 -0600 Subject: [PATCH 3/6] fix attrs --- src/zarr/core/array.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 99344a5b0d..e66c27dff1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -606,10 +606,7 @@ def attrs(self) -> dict[str, JSON]: dict Attributes of the array """ - if self.metadata.attributes is None: - return {} - else: - return self.metadata.attributes + return self.metadata.attributes @property def read_only(self) -> bool: From a962145fb3242faa742444d2cd8ec1e6c90e9a07 Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Tue, 1 Oct 2024 12:17:23 -0600 Subject: [PATCH 4/6] Update src/zarr/core/array.py Co-authored-by: Sanket Verma --- src/zarr/core/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index e66c27dff1..d42b173dd5 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -314,7 +314,7 @@ async def create( Examples -------- >>> import zarr - >>> store = zarr.store.MemoryStore(mode='w') + >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await zarr.core.array.AsyncArray.create( >>> store=store, >>> shape=(100,100), From 451e767f27e50904a21749cf1b6e87ddabd47f00 Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Tue, 1 Oct 2024 12:21:56 -0600 Subject: [PATCH 5/6] fix store -> storage --- src/zarr/core/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 3c3cb473ff..dcb6e8e32e 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -524,7 +524,7 @@ async def open( Examples -------- >>> import zarr - >>> store = zarr.store.MemoryStore(mode='w') + >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await AsyncArray.open(store) # doctest: +ELLIPSIS """ @@ -854,7 +854,7 @@ async def getitem( Examples -------- >>> import zarr - >>> store = zarr.store.MemoryStore(mode='w') + >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await zarr.core.array.AsyncArray.create( ... store=store, ... shape=(100,100), From caabff6219a734e8a7197338ff201e944a7f0bdc Mon Sep 17 00:00:00 2001 From: Emma Marshall <55526386+e-marshall@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:30:07 -0600 Subject: [PATCH 6/6] remove properties from asyncarray docstring --- src/zarr/core/array.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 80351d18a3..35dc4e0c75 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -186,23 +186,6 @@ class AsyncArray: The codec pipeline used for encoding and decoding chunks. order : {'C', 'F'} The order of the array data in memory. - - Properties - ---------- - ndim - shape - chunks - size - dtype - attrs - read_only - path - name - basename - cdata_shape - nchunks - nchunks_initialized - nbytes """ metadata: ArrayMetadata