diff --git a/src/zarr/abc/metadata.py b/src/zarr/abc/metadata.py index 7ea668c891..fdaf99da30 100644 --- a/src/zarr/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -1,5 +1,6 @@ from __future__ import annotations +import base64 from collections.abc import Sequence from typing import TYPE_CHECKING @@ -29,6 +30,8 @@ def to_dict(self) -> dict[str, JSON]: value = getattr(self, key) if isinstance(value, Metadata): out_dict[field.name] = getattr(self, field.name).to_dict() + elif isinstance(value, bytes): + out_dict[key] = base64.b64encode(value) elif isinstance(value, str): out_dict[key] = value elif isinstance(value, Sequence): diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 10047cbb93..f1534a2457 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -313,7 +313,7 @@ def parse_fill_value( """ if fill_value is None: return dtype.type(0) - if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): + if isinstance(fill_value, Sequence) and not isinstance(fill_value, str | bytes): if dtype in (np.complex64, np.complex128): dtype = cast(COMPLEX_DTYPE, dtype) if len(fill_value) == 2: diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index 0a545dfb9d..39365b9b11 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -1,11 +1,13 @@ from __future__ import annotations +import dataclasses import json import re from typing import TYPE_CHECKING, Literal from zarr.codecs.bytes import BytesCodec from zarr.core.buffer import default_buffer_prototype +from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.core.metadata.v3 import ArrayV3Metadata @@ -165,6 +167,31 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) parse_fill_value(fill_value, dtype) +def test_parse_fill_value_bytes(): + result = parse_fill_value("", dtype=np.dtype("S6")) + assert result == np.bytes_("") + + +@pytest.mark.parametrize("fill_value", [None, np.bytes_(b"")]) +def test_fill_value_bytes(fill_value: Any) -> None: + md = ArrayV3Metadata( + shape=(4,), + data_type=np.dtype("S6"), + fill_value=fill_value, + chunk_grid=RegularChunkGrid(chunk_shape=(2,)), + chunk_key_encoding=DefaultChunkKeyEncoding(), + codecs=(), + attributes={}, + dimension_names=("a",), + ) + assert md.fill_value == np.bytes_(b"") + assert md.dtype == np.dtype("S6") + # regression test for creating a new metadata from default values + dataclasses.replace(md) + serialized = md.to_dict() + assert serialized + + @pytest.mark.parametrize("chunk_grid", ["regular"]) @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) @pytest.mark.parametrize("codecs", [[BytesCodec()]])