diff --git a/changes/3390.misc.rst b/changes/3390.misc.rst new file mode 100644 index 0000000000..fa229a3ad8 --- /dev/null +++ b/changes/3390.misc.rst @@ -0,0 +1 @@ +Improve documentation consistency across API functions and remove outdated references to deprecated configuration values that no longer work. \ No newline at end of file diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst index d55937f2dc..76210da791 100644 --- a/docs/user-guide/config.rst +++ b/docs/user-guide/config.rst @@ -28,7 +28,6 @@ Configuration options include the following: - Default Zarr format ``default_zarr_version`` - Default array order in memory ``array.order`` -- Default filters, serializers and compressors, e.g. ``array.v3_default_filters``, ``array.v3_default_serializer``, ``array.v3_default_compressors``, ``array.v2_default_filters`` and ``array.v2_default_compressor`` - Whether empty chunks are written to storage ``array.write_empty_chunks`` - Async and threading options, e.g. ``async.concurrency`` and ``threading.max_workers`` - Selections of implementations of codecs, codec pipelines and buffers diff --git a/pyproject.toml b/pyproject.toml index bea8d77127..9af39fb85c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ test = [ "pytest-accept", "rich", "mypy", + 'numpydoc', "hypothesis", "pytest-xdist", "packaging", diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index dcfadf6a3f..dd5a162927 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -281,7 +281,7 @@ async def load( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. path : str or None, optional The path within the store from which to load. @@ -325,7 +325,7 @@ async def open( Parameters ---------- - store : Store or str, optional + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means @@ -338,8 +338,8 @@ async def open( path : str or None, optional The path within the store to open. storage_options : dict - If the store is backed by an fsspec-based implementation, then this dict will be passed to - the Store constructor for that implementation. Ignored otherwise. + If using an fsspec URL to create the store, these will be passed to + the backend implementation. Ignored otherwise. **kwargs Additional parameters are passed through to :func:`zarr.creation.open_array` or :func:`zarr.hierarchy.open_group`. @@ -409,7 +409,7 @@ async def save( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. *args : ndarray NumPy arrays with data to save. @@ -445,12 +445,13 @@ async def save_array( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. arr : ndarray NumPy array with data to save. zarr_format : {2, 3, None}, optional - The zarr format to use when saving (default is 3 if not specified). + The zarr format to use when saving. The default is ``None``, which will + use the default Zarr format defined in the global configuration object. path : str or None, optional The path within the store where the array will be saved. storage_options : dict @@ -500,7 +501,7 @@ async def save_group( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. *args : ndarray NumPy arrays with data to save. @@ -649,14 +650,13 @@ async def group( Parameters ---------- - store : Store or str, optional - Store or path to directory in file system. + store : StoreLike or None, default=None + Store or path to directory in file system or name of zip file. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. - chunk_store : Store, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. + chunk_store : StoreLike or None, default=None + Separate storage for chunks. Not implemented. cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior @@ -712,8 +712,8 @@ async def create_group( Parameters ---------- - store : Store or str - Store or path to directory in file system. + store : StoreLike + Store or path to directory in file system or name of zip file. path : str, optional Group path within store. overwrite : bool, optional @@ -768,7 +768,7 @@ async def open_group( Parameters ---------- - store : Store, str, or mapping, optional + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system @@ -793,7 +793,7 @@ async def open_group( Array synchronizer. path : str, optional Group path within store. - chunk_store : Store or str, optional + chunk_store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. storage_options : dict If using an fsspec URL to create the store, these will be passed to @@ -869,7 +869,7 @@ async def create( compressor: CompressorLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, - store: str | StoreLike | None = None, + store: StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, path: PathLike | None = None, @@ -906,65 +906,58 @@ async def create( shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional - The shape of the array's chunks. - Zarr format 2 only. Zarr format 3 arrays should use `chunk_shape` instead. - If not specified, default values are guessed based on the shape and dtype. + Chunk shape. If True, will be guessed from ``shape`` and ``dtype``. If + False, will be set to ``shape``, i.e., single chunk for the whole array. + If an int, the chunk size in each dimension will be given by the value + of ``chunks``. Default is True. dtype : str or dtype, optional NumPy dtype. - chunk_shape : int or tuple of ints, optional - The shape of the Array's chunks (default is None). - Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. - chunk_key_encoding : ChunkKeyEncoding, optional - A specification of how the chunk keys are represented in storage. - Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. - Default is ``("default", "/")``. - codecs : Sequence of Codecs or dicts, optional - An iterable of Codec or dict serializations of Codecs. The elements of - this collection specify the transformation from array values to stored bytes. - Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. - - If no codecs are provided, default codecs will be used: - - - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. - - These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. compressor : Codec, optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. - If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used: - - - For numeric arrays, the default is ``ZstdCodec``. - - For Unicode strings, the default is ``VLenUTF8Codec``. - - For bytes or objects, the default is ``VLenBytesCodec``. + If neither ``compressor`` nor ``filters`` are provided, the default compressor + :class:`zarr.codecs.ZstdCodec` is used. - These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. - fill_value : object - Default value to use for uninitialized portions of the array. + If ``compressor`` is set to ``None``, no compression is used. + fill_value : Any, optional + Fill value for the array. order : {'C', 'F'}, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'order': }`` to ``create`` instead of using this parameter. Memory layout to be used within each chunk. If not specified, the ``array.order`` parameter in the global config will be used. - store : Store or str + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. synchronizer : object, optional Array synchronizer. overwrite : bool, optional - If True, delete all pre-existing data in `store` at `path` before + If True, delete all pre-existing data in ``store`` at ``path`` before creating the array. path : str, optional Path under which array is stored. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used + chunk_store : StoreLike or None, default=None + Separate storage for chunks. If not provided, ``store`` will be used for storage of both chunks and metadata. - filters : sequence of Codecs, optional - Sequence of filters to use to encode chunk data prior to compression. - Zarr format 2 only. If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + filters : Iterable[Codec] | Literal["auto"], optional + Iterable of filters to apply to each chunk of the array, in order, before serializing that + chunk to bytes. + + For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the + the order if your filters is consistent with the behavior of each filter. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded @@ -981,7 +974,6 @@ async def create( dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. - Default is ".". write_empty_chunks : bool, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'write_empty_chunks': }`` to ``create`` instead of using this parameter. @@ -991,15 +983,36 @@ async def create( that chunk is not be stored, and the store entry for that chunk's key is deleted. zarr_format : {2, 3, None}, optional - The zarr format to use when saving. - Default is 3. + The Zarr format to use when creating an array. The default is ``None``, + which instructs Zarr to choose the default Zarr format value defined in the + runtime configuration. meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. + Not implemented. + attributes : dict[str, JSON], optional + A dictionary of user attributes to store with the array. + chunk_shape : int or tuple of ints, optional + The shape of the Array's chunks (default is None). + Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. + chunk_key_encoding : ChunkKeyEncoding, optional + A specification of how the chunk keys are represented in storage. + Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. + Default is ``("default", "/")``. + codecs : Sequence of Codecs or dicts, optional + An iterable of Codec or dict serializations of Codecs. Zarr V3 only. + + The elements of ``codecs`` specify the transformation from array values to stored bytes. + Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. + + If no codecs are provided, default codecs will be used based on the data type of the array. + For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; + data types that require a special :class:`zarr.abc.codec.ArrayBytesCodec`, like variable-length strings or bytes, + will use the :class:`zarr.abc.codec.ArrayBytesCodec` required for the data type instead of :class:`zarr.codecs.BytesCodec`. + dimension_names : Iterable[str | None] | None = None + An iterable of dimension names. Zarr format 3 only. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. - config : ArrayConfig or ArrayConfigLike, optional + config : ArrayConfigLike, optional Runtime configuration of the array. If provided, will override the default values from `zarr.config.array`. @@ -1224,7 +1237,7 @@ async def open_array( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. zarr_version : {2, 3, None}, optional The zarr format to use when saving. Deprecated in favor of zarr_format. diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 1e47208dcc..d0134a4900 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -142,7 +142,7 @@ def load( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. path : str or None, optional The path within the store from which to load. @@ -182,7 +182,7 @@ def open( Parameters ---------- - store : Store or str, optional + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means @@ -198,8 +198,8 @@ def open( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Additional parameters are passed through to :func:`zarr.api.asynchronous.open_array` or - :func:`zarr.api.asynchronous.open_group`. + Additional parameters are passed through to :func:`zarr.creation.open_array` or + :func:`zarr.hierarchy.open_group`. Returns ------- @@ -244,7 +244,7 @@ def save( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. *args : ndarray NumPy arrays with data to save. @@ -278,12 +278,13 @@ def save_array( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. arr : ndarray NumPy array with data to save. zarr_format : {2, 3, None}, optional - The zarr format to use when saving. + The zarr format to use when saving. The default is ``None``, which will + use the default Zarr format defined in the global configuration object. path : str or None, optional The path within the store where the array will be saved. storage_options : dict @@ -320,7 +321,7 @@ def save_group( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. *args : ndarray NumPy arrays with data to save. @@ -411,14 +412,13 @@ def group( Parameters ---------- - store : Store or str, optional - Store or path to directory in file system. + store : StoreLike or None, default=None + Store or path to directory in file system or name of zip file. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. - chunk_store : Store, optional - Separate storage for chunks. If not provided, `store` will be used - for storage of both chunks and metadata. + chunk_store : StoreLike or None, default=None + Separate storage for chunks. Not implemented. cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior @@ -479,7 +479,7 @@ def open_group( Parameters ---------- - store : Store, str, or mapping, optional + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. Strings are interpreted as paths on the local file system @@ -504,7 +504,7 @@ def open_group( Array synchronizer. path : str, optional Group path within store. - chunk_store : Store or str, optional + chunk_store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. storage_options : dict If using an fsspec URL to create the store, these will be passed to @@ -527,7 +527,7 @@ def open_group( To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, which will fall back to using the regular, non consolidated metadata. - Zarr format 2 allows configuring the key storing the consolidated metadata + Zarr format 2 allowed configuring the key storing the consolidated metadata (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` to load consolidated metadata from a non-default key. @@ -569,8 +569,8 @@ def create_group( Parameters ---------- - store : Store or str - Store or path to directory in file system. + store : StoreLike + Store or path to directory in file system or name of zip file. path : str, optional Group path within store. overwrite : bool, optional @@ -613,7 +613,7 @@ def create( compressor: CompressorLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type order: MemoryOrder | None = None, - store: str | StoreLike | None = None, + store: StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, path: PathLike | None = None, @@ -650,35 +650,58 @@ def create( shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional - Chunk shape. If True, will be guessed from `shape` and `dtype`. If - False, will be set to `shape`, i.e., single chunk for the whole array. + Chunk shape. If True, will be guessed from ``shape`` and ``dtype``. If + False, will be set to ``shape``, i.e., single chunk for the whole array. If an int, the chunk size in each dimension will be given by the value - of `chunks`. Default is True. + of ``chunks``. Default is True. dtype : str or dtype, optional NumPy dtype. compressor : Codec, optional - Primary compressor. - fill_value : object - Default value to use for uninitialized portions of the array. + Primary compressor to compress chunk data. + Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. + + If neither ``compressor`` nor ``filters`` are provided, the default compressor + :class:`zarr.codecs.ZstdCodec` is used. + + If ``compressor`` is set to ``None``, no compression is used. + fill_value : Any, optional + Fill value for the array. order : {'C', 'F'}, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'order': }`` to ``create`` instead of using this parameter. Memory layout to be used within each chunk. If not specified, the ``array.order`` parameter in the global config will be used. - store : Store or str + store : StoreLike or None, default=None Store or path to directory in file system or name of zip file. synchronizer : object, optional Array synchronizer. overwrite : bool, optional - If True, delete all pre-existing data in `store` at `path` before + If True, delete all pre-existing data in ``store`` at ``path`` before creating the array. path : str, optional Path under which array is stored. - chunk_store : MutableMapping, optional - Separate storage for chunks. If not provided, `store` will be used + chunk_store : StoreLike or None, default=None + Separate storage for chunks. If not provided, ``store`` will be used for storage of both chunks and metadata. - filters : sequence of Codecs, optional - Sequence of filters to use to encode chunk data prior to compression. + filters : Iterable[Codec] | Literal["auto"], optional + Iterable of filters to apply to each chunk of the array, in order, before serializing that + chunk to bytes. + + For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the + the order if your filters is consistent with the behavior of each filter. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded @@ -694,6 +717,7 @@ def create( A codec to encode object arrays, only needed if dtype=object. dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. + Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. write_empty_chunks : bool, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'write_empty_chunks': }`` to ``create`` instead of using this parameter. @@ -703,10 +727,32 @@ def create( that chunk is not be stored, and the store entry for that chunk's key is deleted. zarr_format : {2, 3, None}, optional - The zarr format to use when saving. + The Zarr format to use when creating an array. The default is ``None``, + which instructs Zarr to choose the default Zarr format value defined in the + runtime configuration. meta_array : array-like, optional - An array instance to use for determining arrays to create and return - to users. Use `numpy.empty(())` by default. + Not implemented. + attributes : dict[str, JSON], optional + A dictionary of user attributes to store with the array. + chunk_shape : int or tuple of ints, optional + The shape of the Array's chunks (default is None). + Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. + chunk_key_encoding : ChunkKeyEncoding, optional + A specification of how the chunk keys are represented in storage. + Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. + Default is ``("default", "/")``. + codecs : Sequence of Codecs or dicts, optional + An iterable of Codec or dict serializations of Codecs. Zarr V3 only. + + The elements of ``codecs`` specify the transformation from array values to stored bytes. + Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. + + If no codecs are provided, default codecs will be used based on the data type of the array. + For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; + data types that require a special :class:`zarr.abc.codec.ArrayBytesCodec`, like variable-length strings or bytes, + will use the :class:`zarr.abc.codec.ArrayBytesCodec` required for the data type instead of :class:`zarr.codecs.BytesCodec`. + dimension_names : Iterable[str | None] | None = None + An iterable of dimension names. Zarr format 3 only. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. @@ -757,7 +803,7 @@ def create( def create_array( - store: str | StoreLike, + store: StoreLike, *, name: str | None = None, shape: ShapeLike | None = None, @@ -785,41 +831,42 @@ def create_array( Parameters ---------- - store : str or Store + store : StoreLike Store or path to directory in file system or name of zip file. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. shape : ShapeLike, optional Shape of the array. Must be ``None`` if ``data`` is provided. - dtype : ZDTypeLike, optional + dtype : ZDTypeLike | None Data type of the array. Must be ``None`` if ``data`` is provided. data : np.ndarray, optional Array-like data to use for initializing the array. If this parameter is provided, the ``shape`` and ``dtype`` parameters must be ``None``. - chunks : tuple[int, ...], optional + chunks : tuple[int, ...] | Literal["auto"], default="auto" Chunk shape of the array. - If not specified, default are guessed based on the shape and dtype. + If chunks is "auto", a chunk shape is guessed based on the shape of the array and the dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -856,7 +903,7 @@ def create_array( The zarr format to use when saving. attributes : dict, optional Attributes for the array. - chunk_key_encoding : ChunkKeyEncoding, optional + chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. @@ -868,7 +915,7 @@ def create_array( Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. - If `True`, all existing paths in the store will be deleted. + If ``True``, all existing paths in the store will be deleted. config : ArrayConfigLike, optional Runtime configuration for the array. write_data : bool @@ -923,7 +970,7 @@ def create_array( def from_array( - store: str | StoreLike, + store: StoreLike, *, data: Array | npt.ArrayLike, write_data: bool = True, @@ -947,8 +994,8 @@ def from_array( Parameters ---------- - store : str or Store - Store or path to directory in file system or name of zip file for the new array. + store : StoreLike + Store or path to directory in file system or name of zip file. data : Array | array-like The array to copy. write_data : bool, default True @@ -977,24 +1024,27 @@ def from_array( - None: No sharding. If not specified, defaults to "keep" if data is a zarr Array, otherwise None. - filters : Iterable[Codec] or "auto" or "keep", optional + filters : Iterable[Codec] | Literal["auto", "keep"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - Following values are supported: - - - Iterable[Codec]: List of filters to apply to the array. - - "auto": Automatically determine the filters based on the array's dtype. - - "keep": Retain the filters of the data array if it is a zarr Array. + The default value of ``"keep"`` instructs Zarr to infer ``filters`` from ``data``. + If that inference is not possible, Zarr will fall back to the behavior specified by ``"auto"``, + which is to choose default filters based on the data type of the array and the Zarr format specified. + For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. + The only cases where default filters are not empty is when the Zarr format is 2, and the + data type is a variable-length data type like :class:`zarr.dtype.VariableLengthUTF8` or + :class:`zarr.dtype.VariableLengthUTF8`. In these cases, the default filters is a tuple with a + single element which is a codec specific to that particular data type. - If no ``filters`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto". + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec] or "auto" or "keep", optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -1046,7 +1096,7 @@ def from_array( For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. If not specified and the data array has the same zarr format as the target array, the chunk key encoding of the data array is used. - dimension_names : Iterable[str], optional + dimension_names : Iterable[str | None] | None The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. If not specified, defaults to the dimension names of the data array. @@ -1272,6 +1322,7 @@ def open_array( store: StoreLike | None = None, *, zarr_version: ZarrFormat | None = None, + zarr_format: ZarrFormat | None = None, path: PathLike = "", storage_options: dict[str, Any] | None = None, **kwargs: Any, @@ -1280,9 +1331,11 @@ def open_array( Parameters ---------- - store : Store or str + store : StoreLike Store or path to directory in file system or name of zip file. zarr_version : {2, 3, None}, optional + The zarr format to use when saving. Deprecated in favor of zarr_format. + zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str, optional Path in store to array. @@ -1290,7 +1343,8 @@ def open_array( If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs - Any keyword arguments to pass to ``create``. + Any keyword arguments to pass to :func:`create`. + Returns ------- @@ -1302,6 +1356,7 @@ def open_array( async_api.open_array( store=store, zarr_version=zarr_version, + zarr_format=zarr_format, path=path, storage_options=storage_options, **kwargs, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f31b0cc0a4..960b322a25 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -64,7 +64,6 @@ parse_shapelike, product, ) -from zarr.core.config import categorize_data_type from zarr.core.config import config as zarr_config from zarr.core.dtype import ( VariableLengthBytes, @@ -524,12 +523,6 @@ async def create( If no codecs are provided, default codecs will be used: - - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. - - These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. @@ -546,11 +539,25 @@ async def create( If `zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. - filters : list[dict[str, JSON]], optional - Sequence of filters to use to encode chunk data prior to compression. - Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` - are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + filters : Iterable[Codec] | Literal["auto"], optional + Iterable of filters to apply to each chunk of the array, in order, before serializing that + chunk to bytes. + + For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the + the order if your filters is consistent with the behavior of each filter. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressor : dict[str, JSON], optional The compressor used to compress the data (default is None). Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. @@ -2067,9 +2074,6 @@ def create( - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. - - These defaults can be changed by modifying the value of ``array.v3_default_filters``, - ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. @@ -2086,11 +2090,25 @@ def create( If `zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'order': 'C'}``. - filters : list[dict[str, JSON]], optional - Sequence of filters to use to encode chunk data prior to compression. - Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` - are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. + filters : Iterable[Codec] | Literal["auto"], optional + Iterable of filters to apply to each chunk of the array, in order, before serializing that + chunk to bytes. + + For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the + the order if your filters is consistent with the behavior of each filter. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressor : dict[str, JSON], optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. @@ -2232,7 +2250,7 @@ def open( Parameters ---------- - store : Store + store : StoreLike Store containing the Array. Returns @@ -4164,7 +4182,7 @@ class ShardsConfigParam(TypedDict): async def from_array( - store: str | StoreLike, + store: StoreLike, *, data: Array | npt.ArrayLike, write_data: bool = True, @@ -4182,14 +4200,14 @@ async def from_array( dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array from an existing array or array-like. Parameters ---------- - store : str or Store - Store or path to directory in file system or name of zip file for the new array. + store : StoreLike + Store or path to directory in file system or name of zip file. data : Array | array-like The array to copy. write_data : bool, default True @@ -4218,24 +4236,27 @@ async def from_array( - None: No sharding. If not specified, defaults to "keep" if data is a zarr Array, otherwise None. - filters : Iterable[Codec] or "auto" or "keep", optional + filters : Iterable[Codec] | Literal["auto", "keep"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - Following values are supported: - - - Iterable[Codec]: List of filters to apply to the array. - - "auto": Automatically determine the filters based on the array's dtype. - - "keep": Retain the filters of the data array if it is a zarr Array. + The default value of ``"keep"`` instructs Zarr to infer ``filters`` from ``data``. + If that inference is not possible, Zarr will fall back to the behavior specified by ``"auto"``, + which is to choose default filters based on the data type of the array and the Zarr format specified. + For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. + The only cases where default filters are not empty is when the Zarr format is 2, and the + data type is a variable-length data type like :class:`zarr.dtype.VariableLengthUTF8` or + :class:`zarr.dtype.VariableLengthUTF8`. In these cases, the default filters is a tuple with a + single element which is a codec specific to that particular data type. - If no ``filters`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto". + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec] or "auto" or "keep", optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -4287,7 +4308,7 @@ async def from_array( For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. If not specified and the data array has the same zarr format as the target array, the chunk key encoding of the data array is used. - dimension_names : Iterable[str | None], optional + dimension_names : Iterable[str | None] | None The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. If not specified, defaults to the dimension names of the data array. @@ -4462,46 +4483,40 @@ async def init_array( If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. - compressors : Iterable[Codec], optional + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. + compressors : Iterable[Codec] | Literal["auto"], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. - For Zarr format 3, a "compressor" is a codec that takes a bytestream, and - returns another bytestream. Multiple compressors my be provided for Zarr format 3. - If no ``compressors`` are provided, a default set of compressors will be used. - These defaults can be changed by modifying the value of ``array.v3_default_compressors`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default compressors. + The default value of ``"auto"`` instructs Zarr to use a default of :class:`zarr.codecs.ZstdCodec`. - For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may - be provided for Zarr format 2. - If no ``compressor`` is provided, a default compressor will be used. - in :mod:`zarr.core.config`. - Use ``None`` to omit the default compressor. - serializer : dict[str, JSON] | ArrayBytesCodec, optional + To create an array with no compressors, provide an empty iterable or the value ``None``. + serializer : dict[str, JSON] | ArrayBytesCodec | Literal["auto"], optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. - If no ``serializer`` is provided, a default serializer will be used. - These defaults can be changed by modifying the value of ``array.v3_default_serializer`` - in :mod:`zarr.core.config`. + + The default value of ``"auto"`` instructs Zarr to use a default codec based on the data type of the array. + For most data types this default codec is :class:`zarr.codecs.BytesCodec`. + For :class:`zarr.dtype.VariableLengthUTF8`, the default codec is :class:`zarr.codecs.VlenUTF8Codec`. + For :class:`zarr.dtype.VariableLengthBytes`, the default codec is :class:`zarr.codecs.VlenBytesCodec`. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional @@ -4648,7 +4663,7 @@ async def init_array( async def create_array( - store: str | StoreLike, + store: StoreLike, *, name: str | None = None, shape: ShapeLike | None = None, @@ -4674,41 +4689,42 @@ async def create_array( Parameters ---------- - store : str or Store + store : StoreLike Store or path to directory in file system or name of zip file. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. - shape : tuple[int, ...], optional - Shape of the array. Can be ``None`` if ``data`` is provided. + shape : ShapeLike, optional + Shape of the array. Must be ``None`` if ``data`` is provided. dtype : ZDTypeLike | None - Data type of the array. Can be ``None`` if ``data`` is provided. - data : Array-like data to use for initializing the array. If this parameter is provided, the - ``shape`` and ``dtype`` parameters must be identical to ``data.shape`` and ``data.dtype``, - or ``None``. - chunks : tuple[int, ...], optional + Data type of the array. Must be ``None`` if ``data`` is provided. + data : np.ndarray, optional + Array-like data to use for initializing the array. If this parameter is provided, the + ``shape`` and ``dtype`` parameters must be ``None``. + chunks : tuple[int, ...] | Literal["auto"], default="auto" Chunk shape of the array. - If not specified, default are guessed based on the shape and dtype. + If chunks is "auto", a chunk shape is guessed based on the shape of the array and the dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -4757,6 +4773,7 @@ async def create_array( Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. + If ``True``, all existing paths in the store will be deleted. config : ArrayConfigLike, optional Runtime configuration for the array. write_data : bool @@ -4950,26 +4967,6 @@ def _parse_chunk_key_encoding( return result -def _get_default_chunk_encoding_v3( - dtype: ZDType[TBaseDType, TBaseScalar], -) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: - """ - Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype. - """ - - dtype_category = categorize_data_type(dtype) - - filters = zarr_config.get("array.v3_default_filters").get(dtype_category) - compressors = zarr_config.get("array.v3_default_compressors").get(dtype_category) - serializer = zarr_config.get("array.v3_default_serializer").get(dtype_category) - - return ( - tuple(_parse_array_array_codec(f) for f in filters), - _parse_array_bytes_codec(serializer), - tuple(_parse_bytes_bytes_codec(c) for c in compressors), - ) - - def default_filters_v3(dtype: ZDType[Any, Any]) -> tuple[ArrayArrayCodec, ...]: """ Given a data type, return the default filters for that data type. diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 85d83713e4..f5bb14c48e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -74,7 +74,7 @@ ) from typing import Any - from zarr.core.array_spec import ArrayConfig, ArrayConfigLike + from zarr.core.array_spec import ArrayConfigLike from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike from zarr.core.common import MemoryOrder @@ -1062,24 +1062,25 @@ async def create_array( If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -2448,9 +2449,149 @@ def require_groups(self, *names: str) -> tuple[Group, ...]: """ return tuple(map(Group, self._sync(self._async_group.require_groups(*names)))) - def create(self, *args: Any, **kwargs: Any) -> Array: - # Backwards compatibility for 2.x - return self.create_array(*args, **kwargs) + def create( + self, + name: str, + *, + shape: ShapeLike | None = None, + dtype: ZDTypeLike | None = None, + data: np.ndarray[Any, np.dtype[Any]] | None = None, + chunks: tuple[int, ...] | Literal["auto"] = "auto", + shards: ShardsLike | None = None, + filters: FiltersLike = "auto", + compressors: CompressorsLike = "auto", + compressor: CompressorLike = "auto", + serializer: SerializerLike = "auto", + fill_value: Any | None = DEFAULT_FILL_VALUE, + order: MemoryOrder | None = None, + attributes: dict[str, JSON] | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, + dimension_names: DimensionNames = None, + storage_options: dict[str, Any] | None = None, + overwrite: bool = False, + config: ArrayConfigLike | None = None, + write_data: bool = True, + ) -> Array: + """Create an array within this group. + + This method lightly wraps :func:`zarr.core.array.create_array`. + + Parameters + ---------- + name : str + The name of the array relative to the group. If ``path`` is ``None``, the array will be located + at the root of the store. + shape : ShapeLike, optional + Shape of the array. Must be ``None`` if ``data`` is provided. + dtype : npt.DTypeLike | None + Data type of the array. Must be ``None`` if ``data`` is provided. + data : Array-like data to use for initializing the array. If this parameter is provided, the + ``shape`` and ``dtype`` parameters must be ``None``. + chunks : tuple[int, ...], optional + Chunk shape of the array. + If not specified, default are guessed based on the shape and dtype. + shards : tuple[int, ...], optional + Shard shape of the array. The default value of ``None`` results in no sharding at all. + filters : Iterable[Codec] | Literal["auto"], optional + Iterable of filters to apply to each chunk of the array, in order, before serializing that + chunk to bytes. + + For Zarr format 3, a "filter" is a codec that takes an array and returns an array, + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. + + For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the + the order if your filters is consistent with the behavior of each filter. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. + compressors : Iterable[Codec], optional + List of compressors to apply to the array. Compressors are applied in order, and after any + filters are applied (if any are specified) and the data is serialized into bytes. + + For Zarr format 3, a "compressor" is a codec that takes a bytestream, and + returns another bytestream. Multiple compressors my be provided for Zarr format 3. + If no ``compressors`` are provided, a default set of compressors will be used. + These defaults can be changed by modifying the value of ``array.v3_default_compressors`` + in :mod:`zarr.core.config`. + Use ``None`` to omit default compressors. + + For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may + be provided for Zarr format 2. + If no ``compressor`` is provided, a default compressor will be used. + in :mod:`zarr.core.config`. + Use ``None`` to omit the default compressor. + compressor : Codec, optional + Deprecated in favor of ``compressors``. + serializer : dict[str, JSON] | ArrayBytesCodec, optional + Array-to-bytes codec to use for encoding the array data. + Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. + If no ``serializer`` is provided, a default serializer will be used. + These defaults can be changed by modifying the value of ``array.v3_default_serializer`` + in :mod:`zarr.core.config`. + fill_value : Any, optional + Fill value for the array. + order : {"C", "F"}, optional + The memory of the array (default is "C"). + For Zarr format 2, this parameter sets the memory order of the array. + For Zarr format 3, this parameter is deprecated, because memory order + is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory + order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. + If no ``order`` is provided, a default order will be used. + This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. + attributes : dict, optional + Attributes for the array. + chunk_key_encoding : ChunkKeyEncoding, optional + A specification of how the chunk keys are represented in storage. + For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. + For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. + dimension_names : Iterable[str], optional + The names of the dimensions (default is None). + Zarr format 3 only. Zarr format 2 arrays should not use this parameter. + storage_options : dict, optional + If using an fsspec URL to create the store, these will be passed to the backend implementation. + Ignored otherwise. + overwrite : bool, default False + Whether to overwrite an array with the same name in the store, if one exists. + config : ArrayConfig or ArrayConfigLike, optional + Runtime configuration for the array. + write_data : bool + If a pre-existing array-like object was provided to this function via the ``data`` parameter + then ``write_data`` determines whether the values in that array-like object should be + written to the Zarr array created by this function. If ``write_data`` is ``False``, then the + array will be left empty. + + Returns + ------- + AsyncArray + """ + return self.create_array( + name, + shape=shape, + dtype=dtype, + data=data, + chunks=chunks, + shards=shards, + filters=filters, + compressors=compressors, + compressor=compressor, + serializer=serializer, + fill_value=fill_value, + order=order, + attributes=attributes, + chunk_key_encoding=chunk_key_encoding, + dimension_names=dimension_names, + storage_options=storage_options, + overwrite=overwrite, + config=config, + write_data=write_data, + ) def create_array( self, @@ -2495,24 +2636,25 @@ def create_array( If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. @@ -2867,7 +3009,7 @@ def array( dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, - config: ArrayConfig | ArrayConfigLike | None = None, + config: ArrayConfigLike | None = None, data: npt.ArrayLike | None = None, ) -> Array: """Create an array within this group. @@ -2891,24 +3033,25 @@ def array( If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. - filters : Iterable[Codec], optional + filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, - and these values must be instances of ``ArrayArrayCodec``, or dict representations - of ``ArrayArrayCodec``. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v3_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + and these values must be instances of :class:`zarr.abc.codec.ArrayArrayCodec`, or a + dict representations of :class:`zarr.abc.codec.ArrayArrayCodec`. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. - If no ``filters`` are provided, a default set of filters will be used. - These defaults can be changed by modifying the value of ``array.v2_default_filters`` - in :mod:`zarr.core.config`. - Use ``None`` to omit default filters. + + The default value of ``"auto"`` instructs Zarr to use a default used based on the data + type of the array and the Zarr format specified. For all data types in Zarr V3, and most + data types in Zarr V2, the default filters are empty. The only cases where default filters + are not empty is when the Zarr format is 2, and the data type is a variable-length data type like + :class:`zarr.dtype.VariableLengthUTF8` or :class:`zarr.dtype.VariableLengthUTF8`. In these cases, + the default filters contains a single element which is a codec specific to that particular data type. + + To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. diff --git a/src/zarr/storage/_common.py b/src/zarr/storage/_common.py index 90f0b28870..5df2c11c5c 100644 --- a/src/zarr/storage/_common.py +++ b/src/zarr/storage/_common.py @@ -294,7 +294,7 @@ async def make_store_path( Parameters ---------- - store_like : StoreLike | None + store_like : StoreLike or None, default=None The object to convert to a `StorePath` object. path : str | None, optional The path to use when creating the `StorePath` object. If None, the diff --git a/tests/test_api/test_synchronous.py b/tests/test_api/test_synchronous.py new file mode 100644 index 0000000000..d6ae61f1ca --- /dev/null +++ b/tests/test_api/test_synchronous.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Final + +import pytest +from numpydoc.docscrape import NumpyDocString + +import zarr +from zarr.api import asynchronous, synchronous + +if TYPE_CHECKING: + from collections.abc import Callable + +MATCHED_EXPORT_NAMES: Final[tuple[str, ...]] = tuple( + sorted(set(synchronous.__all__) | set(asynchronous.__all__)) +) +"""A sorted tuple of names that are exported by both the sync and async APIs.""" + +MATCHED_CALLABLE_NAMES: Final[tuple[str, ...]] = tuple( + x for x in MATCHED_EXPORT_NAMES if callable(getattr(synchronous, x)) +) +"""A sorted tuple of callable names that are exported by both the sync and async APIs.""" + + +@pytest.mark.parametrize("callable_name", MATCHED_CALLABLE_NAMES) +def test_docstrings_match(callable_name: str) -> None: + """ + Tests that the docstrings for the sync and async define identical parameters. + """ + callable_a = getattr(synchronous, callable_name) + callable_b = getattr(asynchronous, callable_name) + if callable_a.__doc__ is None: + assert callable_b.__doc__ is None + else: + params_a = NumpyDocString(callable_a.__doc__)["Parameters"] + params_b = NumpyDocString(callable_b.__doc__)["Parameters"] + mismatch = [] + for idx, (a, b) in enumerate(zip(params_a, params_b, strict=False)): + if a != b: + mismatch.append((idx, (a, b))) + assert mismatch == [] + + +@pytest.mark.parametrize( + ("parameter_name", "array_creation_routines"), + [ + ( + ("store", "path"), + ( + asynchronous.create_array, + synchronous.create_array, + asynchronous.create_group, + synchronous.create_group, + zarr.AsyncGroup.create_array, + zarr.Group.create_array, + ), + ), + ( + ( + "store", + "path", + ), + ( + asynchronous.create, + synchronous.create, + zarr.Group.create, + zarr.AsyncArray.create, + zarr.Array.create, + ), + ), + ( + ( + ( + "filters", + "codecs", + "compressors", + "compressor", + "chunks", + "shape", + "dtype", + "shardsfill_value", + ) + ), + ( + asynchronous.create, + synchronous.create, + asynchronous.create_array, + synchronous.create_array, + zarr.AsyncGroup.create_array, + zarr.Group.create_array, + zarr.AsyncGroup.create_dataset, + zarr.Group.create_dataset, + ), + ), + ], + ids=str, +) +def test_docstring_consistent_parameters( + parameter_name: str, array_creation_routines: tuple[Callable[[Any], Any], ...] +) -> None: + """ + Tests that array and group creation routines document the same parameters consistently. + This test inspects the docstrings of sets of callables and generates two dicts: + + - a dict where the keys are parameter descriptions and the values are the names of the routines with those + descriptions + - a dict where the keys are parameter types and the values are the names of the routines with those types + + If each dict has just 1 value, then the parameter description and type in the docstring must be + identical across different routines. But if these dicts have multiple values, then there must be + routines that use the same parameter but document it differently, which will trigger a test failure. + """ + descs: dict[tuple[str, ...], tuple[str, ...]] = {} + types: dict[str, tuple[str, ...]] = {} + for routine in array_creation_routines: + key = f"{routine.__module__}.{routine.__qualname__}" + docstring = NumpyDocString(routine.__doc__) + param_dict = {d.name: d for d in docstring["Parameters"]} + if parameter_name in param_dict: + val = param_dict[parameter_name] + if tuple(val.desc) in descs: + descs[tuple(val.desc)] = descs[tuple(val.desc)] + (key,) + else: + descs[tuple(val.desc)] = (key,) + if val.type in types: + types[val.type] = types[val.type] + (key,) + else: + types[val.type] = (key,) + assert len(descs) <= 1 + assert len(types) <= 1