diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fac0facd7d..fee3169e29 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -82,7 +82,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: return data elif isinstance(data, dict): if data["zarr_format"] == 3: - return ArrayV3Metadata.from_dict(data) + meta_out = ArrayV3Metadata.from_dict(data) + if len(meta_out.storage_transformers) > 0: + msg = ( + f"Array metadata contains storage transformers: {meta_out.storage_transformers}." + "Arrays with storage transformers are not supported in zarr-python at this time." + ) + raise ValueError(msg) + return meta_out elif data["zarr_format"] == 2: return ArrayV2Metadata.from_dict(data) raise TypeError diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 8681c8250e..f0c6dc6282 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: raise TypeError(msg) +def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: + """ + Parse storage_transformers. Zarr python cannot use storage transformers + at this time, so this function doesn't attempt to validate them. + """ + if data is None: + return () + if isinstance(data, Iterable): + if len(tuple(data)) >= 1: + return data # type: ignore[return-value] + else: + return () + raise TypeError( + f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." + ) + + class V3JsonEncoder(json.JSONEncoder): def __init__(self, *args: Any, **kwargs: Any) -> None: self.indent = kwargs.pop("indent", config.get("json_indent")) @@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata): dimension_names: tuple[str, ...] | None = None zarr_format: Literal[3] = field(default=3, init=False) node_type: Literal["array"] = field(default="array", init=False) + storage_transformers: tuple[dict[str, JSON], ...] def __init__( self, @@ -156,6 +174,7 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]], attributes: None | dict[str, JSON], dimension_names: None | Iterable[str], + storage_transformers: None | Iterable[dict[str, JSON]] = None, ) -> None: """ Because the class is a frozen dataclass, we set attributes using object.__setattr__ @@ -168,6 +187,7 @@ def __init__( fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed) attributes_parsed = parse_attributes(attributes) codecs_parsed_partial = parse_codecs(codecs) + storage_transformers_parsed = parse_storage_transformers(storage_transformers) array_spec = ArraySpec( shape=shape_parsed, @@ -186,6 +206,7 @@ def __init__( object.__setattr__(self, "dimension_names", dimension_names_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) + object.__setattr__(self, "storage_transformers", storage_transformers_parsed) self._validate_metadata() diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 95bbde1740..6224bc39e3 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -6,9 +6,10 @@ import pytest from zarr import Array, AsyncArray, Group +from zarr.codecs.bytes import BytesCodec from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer -from zarr.core.common import ZarrFormat +from zarr.core.common import JSON, ZarrFormat from zarr.core.indexing import ceildiv from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError @@ -238,6 +239,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> np.testing.assert_array_equal(actual[:], expected[:]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_storage_transformers(store: MemoryStore) -> None: + """ + Test that providing an actual storage transformer produces a warning and otherwise passes through + """ + metadata_dict: dict[str, JSON] = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) + + @pytest.mark.parametrize("test_cls", [Array, AsyncArray]) @pytest.mark.parametrize("nchunks", [2, 5, 10]) def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None: diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index 025d59422a..71dc917c35 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -14,6 +14,7 @@ from typing import Any from zarr.abc.codec import Codec + from zarr.core.common import JSON import numpy as np @@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) @pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) +@pytest.mark.parametrize("storage_transformers", [None, ()]) def test_metadata_to_dict( chunk_grid: str, codecs: list[Codec], @@ -204,6 +206,7 @@ def test_metadata_to_dict( dimension_separator: Literal[".", "/"] | None, dimension_names: Literal["nones", "strings", "missing"], attributes: None | dict[str, Any], + storage_transformers: None | tuple[dict[str, JSON]], ) -> None: shape = (1, 2, 3) data_type = "uint8" @@ -234,6 +237,7 @@ def test_metadata_to_dict( "chunk_key_encoding": cke, "codecs": tuple(c.to_dict() for c in codecs), "fill_value": fill_value, + "storage_transformers": storage_transformers, } if attributes is not None: @@ -244,9 +248,16 @@ def test_metadata_to_dict( metadata = ArrayV3Metadata.from_dict(metadata_dict) observed = metadata.to_dict() expected = metadata_dict.copy() + + # if unset or None or (), storage_transformers gets normalized to () + assert observed["storage_transformers"] == () + observed.pop("storage_transformers") + expected.pop("storage_transformers") + if attributes is None: assert observed["attributes"] == {} observed.pop("attributes") + if dimension_separator is None: if chunk_key_encoding == "default": expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()