diff --git a/docs/release.rst b/docs/release.rst index ef8a396c0f..13c2f20d2c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,14 @@ Release notes Unreleased ---------- +Bug fixes +~~~~~~~~~ + +* Changes the default value of ``write_empty_chunks`` to ``True`` to prevent + unanticipated data losses when the data types do not have a proper default + value when empty chunks are read back in. + By :user:`Vyas Ramasubramani `; :issue:`965`. + .. _release_2.11.1: 2.11.1 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 906d5d9f08..53ddddb0b9 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -1309,7 +1309,7 @@ Empty chunks As of version 2.11, it is possible to configure how Zarr handles the storage of chunks that are "empty" (i.e., every element in the chunk is equal to the array's fill value). -When creating an array with ``write_empty_chunks=False`` (the default), +When creating an array with ``write_empty_chunks=False``, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, then Zarr does not store it, and instead deletes the chunk from storage if the chunk had been previously stored. @@ -1318,7 +1318,7 @@ This optimization prevents storing redundant objects and can speed up reads, but added computation during array writes, since the contents of each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. -In this case, creating an array with ``write_empty_chunks=True`` will instruct Zarr to write every chunk without checking for emptiness. +In this case, creating an array with ``write_empty_chunks=True`` (the default) will instruct Zarr to write every chunk without checking for emptiness. The following example illustrates the effect of the ``write_empty_chunks`` flag on the time required to write an array with different values.:: diff --git a/zarr/core.py b/zarr/core.py index 5e2b4252aa..e1e04bb8fa 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -161,7 +161,7 @@ def __init__( cache_metadata=True, cache_attrs=True, partial_decompress=False, - write_empty_chunks=False, + write_empty_chunks=True, zarr_version=None, ): # N.B., expect at this point store is fully initialized with all diff --git a/zarr/creation.py b/zarr/creation.py index b8c40a859b..e77f26b3e2 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -74,11 +74,11 @@ def create(shape, chunks=True, dtype=None, compressor='default', .. versionadded:: 2.8 write_empty_chunks : bool, optional - If True, all chunks will be stored regardless of their contents. If - False (default), each chunk is compared to the array's fill value prior - to storing. If a chunk is uniformly equal to the fill value, then that - chunk is not be stored, and the store entry for that chunk's key is - deleted. This setting enables sparser storage, as only chunks with + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk. @@ -403,7 +403,7 @@ def open_array( chunk_store=None, storage_options=None, partial_decompress=False, - write_empty_chunks=False, + write_empty_chunks=True, *, zarr_version=None, dimension_separator=None, @@ -462,11 +462,11 @@ def open_array( is Blosc, when getting data from the array chunks will be partially read and decompressed when possible. write_empty_chunks : bool, optional - If True, all chunks will be stored regardless of their contents. If - False (default), each chunk is compared to the array's fill value prior - to storing. If a chunk is uniformly equal to the fill value, then that - chunk is not be stored, and the store entry for that chunk's key is - deleted. This setting enables sparser storage, as only chunks with + If True (default), all chunks will be stored regardless of their + contents. If False, each chunk is compared to the array's fill value + prior to storing. If a chunk is uniformly equal to the fill value, then + that chunk is not be stored, and the store entry for that chunk's key + is deleted. This setting enables sparser storage, as only chunks with non-fill-value data are stored, at the expense of overhead associated with checking the data of each chunk.