Skip to content

Commit 81a87d6

Browse files
Use implicit fill values for zarr v2 (#2274)
* Use implicit fill values for zarr v2
1 parent 9bce890 commit 81a87d6

File tree

6 files changed

+63
-5
lines changed

6 files changed

+63
-5
lines changed

src/zarr/codecs/pipeline.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from zarr.core.common import ChunkCoords, concurrent_map
1818
from zarr.core.config import config
1919
from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
20+
from zarr.core.metadata.v2 import _default_fill_value
2021
from zarr.registry import register_pipeline
2122

2223
if TYPE_CHECKING:
@@ -247,7 +248,17 @@ async def read_batch(
247248
if chunk_array is not None:
248249
out[out_selection] = chunk_array
249250
else:
250-
out[out_selection] = chunk_spec.fill_value
251+
fill_value = chunk_spec.fill_value
252+
253+
if fill_value is None:
254+
# Zarr V2 allowed `fill_value` to be null in the metadata.
255+
# Zarr V3 requires it to be set. This has already been
256+
# validated when decoding the metadata, but we support reading
257+
# Zarr V2 data and need to support the case where fill_value
258+
# is None.
259+
fill_value = _default_fill_value(dtype=chunk_spec.dtype)
260+
261+
out[out_selection] = fill_value
251262
else:
252263
chunk_bytes_batch = await concurrent_map(
253264
[
@@ -274,7 +285,10 @@ async def read_batch(
274285
tmp = tmp.squeeze(axis=drop_axes)
275286
out[out_selection] = tmp
276287
else:
277-
out[out_selection] = chunk_spec.fill_value
288+
fill_value = chunk_spec.fill_value
289+
if fill_value is None:
290+
fill_value = _default_fill_value(dtype=chunk_spec.dtype)
291+
out[out_selection] = fill_value
278292

279293
def _merge_chunk_array(
280294
self,

src/zarr/core/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ async def _create_v2(
380380
chunks=chunks,
381381
order=order,
382382
dimension_separator=dimension_separator,
383-
fill_value=0 if fill_value is None else fill_value,
383+
fill_value=fill_value,
384384
compressor=compressor,
385385
filters=filters,
386386
attributes=attributes,

src/zarr/core/metadata/v2.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,24 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
300300
raise ValueError(msg) from e
301301

302302
return fill_value
303+
304+
305+
def _default_fill_value(dtype: np.dtype[Any]) -> Any:
306+
"""
307+
Get the default fill value for a type.
308+
309+
Notes
310+
-----
311+
This differs from :func:`parse_fill_value`, which parses a fill value
312+
stored in the Array metadata into an in-memory value. This only gives
313+
the default fill value for some type.
314+
315+
This is useful for reading Zarr V2 arrays, which allow the fill
316+
value to be unspecified.
317+
"""
318+
if dtype.kind == "S":
319+
return b""
320+
elif dtype.kind == "U":
321+
return ""
322+
else:
323+
return dtype.type(0)

src/zarr/testing/strategies.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ def arrays(
140140
)
141141

142142
assert isinstance(a, Array)
143-
assert a.fill_value is not None
143+
if a.metadata.zarr_format == 3:
144+
assert a.fill_value is not None
144145
assert isinstance(root[array_path], Array)
145146
assert nparray.shape == a.shape
146147
assert chunks == a.chunks

tests/v3/test_metadata/test_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_parse_zarr_format_invalid(data: Any) -> None:
2828
@pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
2929
@pytest.mark.parametrize("filters", [None, (), (numcodecs.GZip(),)])
3030
@pytest.mark.parametrize("compressor", [None, numcodecs.GZip()])
31-
@pytest.mark.parametrize("fill_value", [0, 1])
31+
@pytest.mark.parametrize("fill_value", [None, 0, 1])
3232
@pytest.mark.parametrize("order", ["C", "F"])
3333
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
3434
def test_metadata_to_dict(

tests/v3/test_v2.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
from collections.abc import Iterator
3+
from typing import Any
34

45
import numpy as np
56
import pytest
@@ -35,6 +36,27 @@ def test_simple(store: StorePath) -> None:
3536
assert np.array_equal(data, a[:, :])
3637

3738

39+
@pytest.mark.parametrize(
40+
("dtype", "fill_value"),
41+
[
42+
("bool", False),
43+
("int64", 0),
44+
("float64", 0.0),
45+
("|S1", b""),
46+
("|U1", ""),
47+
("object", 0),
48+
(str, ""),
49+
],
50+
)
51+
def test_implicit_fill_value(store: StorePath, dtype: str, fill_value: Any) -> None:
52+
arr = zarr.open_array(store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype)
53+
assert arr.metadata.fill_value is None
54+
assert arr.metadata.to_dict()["fill_value"] is None
55+
result = arr[:]
56+
expected = np.full(arr.shape, fill_value, dtype=dtype)
57+
np.testing.assert_array_equal(result, expected)
58+
59+
3860
def test_codec_pipeline() -> None:
3961
# https://github.com/zarr-developers/zarr-python/issues/2243
4062
store = MemoryStore(mode="w")

0 commit comments

Comments
 (0)