From 8523840b7d058d2e855eb1cc7f3141685dc24e68 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 12 Sep 2024 23:29:17 +0200 Subject: [PATCH 1/3] feat: meager support for storage transformers metadata --- src/zarr/core/metadata/v3.py | 26 +++++++++++++++++++++++++ tests/v3/test_metadata/test_v3.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 195c3bd0a2..41c2b78125 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING, cast, overload if TYPE_CHECKING: @@ -70,6 +71,27 @@ def parse_dimension_names(data: None | Iterable[str | None]) -> tuple[str | None raise TypeError(msg) +def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: + if data is None: + return () + if isinstance(data, Iterable): + if len(tuple(data)) >= 1: + msg = ( + "Got a non-null storage_transformers keyword argument. " + "Storage transformers are not supported by zarr-python at this time. " + "The storage transformer(s) will be retained in array metadata but will not " + "influence storage routines" + ) + warnings.warn(msg, UserWarning, stacklevel=1) + return data # type: ignore[return-value] + else: + return () + else: + raise TypeError( + f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." + ) + + @dataclass(frozen=True, kw_only=True) class ArrayV3Metadata(ArrayMetadata): shape: ChunkCoords @@ -82,6 +104,7 @@ class ArrayV3Metadata(ArrayMetadata): dimension_names: tuple[str, ...] | None = None zarr_format: Literal[3] = field(default=3, init=False) node_type: Literal["array"] = field(default="array", init=False) + storage_transformers: tuple[dict[str, JSON], ...] def __init__( self, @@ -94,6 +117,7 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]], attributes: None | dict[str, JSON], dimension_names: None | Iterable[str], + storage_transformers: None | Iterable[dict[str, JSON]] = None, ) -> None: """ Because the class is a frozen dataclass, we set attributes using object.__setattr__ @@ -106,6 +130,7 @@ def __init__( fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed) attributes_parsed = parse_attributes(attributes) codecs_parsed_partial = parse_codecs(codecs) + storage_transformers_parsed = parse_storage_transformers(storage_transformers) array_spec = ArraySpec( shape=shape_parsed, @@ -124,6 +149,7 @@ def __init__( object.__setattr__(self, "dimension_names", dimension_names_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) + object.__setattr__(self, "storage_transformers", storage_transformers_parsed) self._validate_metadata() diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index 0a545dfb9d..173356bfa8 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -14,6 +14,7 @@ from typing import Any from zarr.abc.codec import Codec + from zarr.core.common import JSON import numpy as np @@ -172,6 +173,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) @pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) +@pytest.mark.parametrize("storage_transformers", [None, ()]) def test_metadata_to_dict( chunk_grid: str, codecs: list[Codec], @@ -180,6 +182,7 @@ def test_metadata_to_dict( dimension_separator: Literal[".", "/"] | None, dimension_names: Literal["nones", "strings", "missing"], attributes: None | dict[str, Any], + storage_transformers: None | tuple[dict[str, JSON]], ) -> None: shape = (1, 2, 3) data_type = "uint8" @@ -210,6 +213,7 @@ def test_metadata_to_dict( "chunk_key_encoding": cke, "codecs": tuple(c.to_dict() for c in codecs), "fill_value": fill_value, + "storage_transformers": storage_transformers, } if attributes is not None: @@ -220,9 +224,16 @@ def test_metadata_to_dict( metadata = ArrayV3Metadata.from_dict(metadata_dict) observed = metadata.to_dict() expected = metadata_dict.copy() + + # if unset or None or (), storage_transformers gets normalized to () + assert observed["storage_transformers"] == () + observed.pop("storage_transformers") + expected.pop("storage_transformers") + if attributes is None: assert observed["attributes"] == {} observed.pop("attributes") + if dimension_separator is None: if chunk_key_encoding == "default": expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict() @@ -253,3 +264,24 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None: result = json.loads(d["zarr.json"].to_bytes()) assert result["fill_value"] == fill_value + + +def test_storage_transformers() -> None: + """ + Test that providing an actual storage transformer produces a warning and otherwise passes through + """ + metadata_dict = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_warn"}), + } + with pytest.warns(): + meta = ArrayV3Metadata.from_dict(metadata_dict) + + assert meta.to_dict()["storage_transformers"] == metadata_dict["storage_transformers"] From 9981eeac9e1972da50f6ad652c21179307bf14f2 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 25 Sep 2024 21:51:00 +0200 Subject: [PATCH 2/3] remove warning, and instead error when creating v3 arrays with storage transformers --- src/zarr/core/array.py | 9 ++++++++- src/zarr/core/metadata/v3.py | 19 +++++++------------ tests/v3/test_array.py | 24 +++++++++++++++++++++++- tests/v3/test_metadata/test_v3.py | 21 --------------------- 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 2cf6d69404..25a9c7fb19 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -79,7 +79,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: return data elif isinstance(data, dict): if data["zarr_format"] == 3: - return ArrayV3Metadata.from_dict(data) + meta_out = ArrayV3Metadata.from_dict(data) + if len(meta_out.storage_transformers) > 0: + msg = ( + f"Array metadata contains storage transformers: {meta_out.storage_transformers}." + "Arrays with storage transformers are not supported in zarr-python at this time." + ) + raise ValueError(msg) + return meta_out elif data["zarr_format"] == 2: return ArrayV2Metadata.from_dict(data) raise TypeError diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index da433fb3bf..38aad8d6cb 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -1,6 +1,5 @@ from __future__ import annotations -import warnings from typing import TYPE_CHECKING, cast, overload if TYPE_CHECKING: @@ -72,24 +71,20 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: + """ + Parse storage_transformers. Zarr python cannot use storage transformers + at this time, so this function doesn't attempt to validate them. + """ if data is None: return () if isinstance(data, Iterable): if len(tuple(data)) >= 1: - msg = ( - "Got a non-null storage_transformers keyword argument. " - "Storage transformers are not supported by zarr-python at this time. " - "The storage transformer(s) will be retained in array metadata but will not " - "influence storage routines" - ) - warnings.warn(msg, UserWarning, stacklevel=1) return data # type: ignore[return-value] else: return () - else: - raise TypeError( - f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." - ) + raise TypeError( + f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." + ) @dataclass(frozen=True, kw_only=True) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index b7beb63b1c..cd4b23896f 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -5,7 +5,8 @@ import pytest from zarr import Array, AsyncArray, Group -from zarr.core.common import ZarrFormat +from zarr.codecs.bytes import BytesCodec +from zarr.core.common import JSON, ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.store import LocalStore, MemoryStore from zarr.store.common import StorePath @@ -188,3 +189,24 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> assert actual == expected np.testing.assert_array_equal(actual[:], expected[:]) + + +@pytest.mark.parametrize("store", ("memory",), indirect=True) +def test_storage_transformers(store: MemoryStore) -> None: + """ + Test that providing an actual storage transformer produces a warning and otherwise passes through + """ + metadata_dict: dict[str, JSON] = { + "zarr_format": 3, + "node_type": "array", + "shape": (10,), + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, + "data_type": "uint8", + "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, + "codecs": (BytesCodec().to_dict(),), + "fill_value": 0, + "storage_transformers": ({"test": "should_raise"}), + } + match = "Arrays with storage transformers are not supported in zarr-python at this time." + with pytest.raises(ValueError, match=match): + Array.from_dict(StorePath(store), data=metadata_dict) diff --git a/tests/v3/test_metadata/test_v3.py b/tests/v3/test_metadata/test_v3.py index 173356bfa8..f4a9d7e370 100644 --- a/tests/v3/test_metadata/test_v3.py +++ b/tests/v3/test_metadata/test_v3.py @@ -264,24 +264,3 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None: result = json.loads(d["zarr.json"].to_bytes()) assert result["fill_value"] == fill_value - - -def test_storage_transformers() -> None: - """ - Test that providing an actual storage transformer produces a warning and otherwise passes through - """ - metadata_dict = { - "zarr_format": 3, - "node_type": "array", - "shape": (10,), - "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, - "data_type": "uint8", - "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, - "codecs": (BytesCodec(),), - "fill_value": 0, - "storage_transformers": ({"test": "should_warn"}), - } - with pytest.warns(): - meta = ArrayV3Metadata.from_dict(metadata_dict) - - assert meta.to_dict()["storage_transformers"] == metadata_dict["storage_transformers"] From d542e618559cb27f40fa61adf8a0f45411e81136 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 27 Sep 2024 07:58:08 +0200 Subject: [PATCH 3/3] unbreak test fixture --- tests/v3/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 7774acca3b..6224bc39e3 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -239,7 +239,7 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> np.testing.assert_array_equal(actual[:], expected[:]) -@pytest.mark.parametrize("store", ["memory",), indirect=True) +@pytest.mark.parametrize("store", ["memory"], indirect=True) def test_storage_transformers(store: MemoryStore) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through