Skip to content

feat: metadata-only support for storage transformers metadata #2180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@ def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata:
return data
elif isinstance(data, dict):
if data["zarr_format"] == 3:
return ArrayV3Metadata.from_dict(data)
meta_out = ArrayV3Metadata.from_dict(data)
if len(meta_out.storage_transformers) > 0:
msg = (
f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
"Arrays with storage transformers are not supported in zarr-python at this time."
)
raise ValueError(msg)
return meta_out
elif data["zarr_format"] == 2:
return ArrayV2Metadata.from_dict(data)
raise TypeError
Expand Down
21 changes: 21 additions & 0 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
raise TypeError(msg)


def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
"""
Parse storage_transformers. Zarr python cannot use storage transformers
at this time, so this function doesn't attempt to validate them.
"""
if data is None:
return ()
if isinstance(data, Iterable):
if len(tuple(data)) >= 1:
return data # type: ignore[return-value]
else:
return ()
raise TypeError(
f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead."
)


class V3JsonEncoder(json.JSONEncoder):
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.indent = kwargs.pop("indent", config.get("json_indent"))
Expand Down Expand Up @@ -144,6 +161,7 @@ class ArrayV3Metadata(ArrayMetadata):
dimension_names: tuple[str, ...] | None = None
zarr_format: Literal[3] = field(default=3, init=False)
node_type: Literal["array"] = field(default="array", init=False)
storage_transformers: tuple[dict[str, JSON], ...]

def __init__(
self,
Expand All @@ -156,6 +174,7 @@ def __init__(
codecs: Iterable[Codec | dict[str, JSON]],
attributes: None | dict[str, JSON],
dimension_names: None | Iterable[str],
storage_transformers: None | Iterable[dict[str, JSON]] = None,
) -> None:
"""
Because the class is a frozen dataclass, we set attributes using object.__setattr__
Expand All @@ -168,6 +187,7 @@ def __init__(
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
attributes_parsed = parse_attributes(attributes)
codecs_parsed_partial = parse_codecs(codecs)
storage_transformers_parsed = parse_storage_transformers(storage_transformers)

array_spec = ArraySpec(
shape=shape_parsed,
Expand All @@ -186,6 +206,7 @@ def __init__(
object.__setattr__(self, "dimension_names", dimension_names_parsed)
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "attributes", attributes_parsed)
object.__setattr__(self, "storage_transformers", storage_transformers_parsed)

self._validate_metadata()

Expand Down
24 changes: 23 additions & 1 deletion tests/v3/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import pytest

from zarr import Array, AsyncArray, Group
from zarr.codecs.bytes import BytesCodec
from zarr.core.array import chunks_initialized
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.common import ZarrFormat
from zarr.core.common import JSON, ZarrFormat
from zarr.core.indexing import ceildiv
from zarr.core.sync import sync
from zarr.errors import ContainsArrayError, ContainsGroupError
Expand Down Expand Up @@ -238,6 +239,27 @@ def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) ->
np.testing.assert_array_equal(actual[:], expected[:])


@pytest.mark.parametrize("store", ["memory"], indirect=True)
def test_storage_transformers(store: MemoryStore) -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict: dict[str, JSON] = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec().to_dict(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_raise"}),
}
match = "Arrays with storage transformers are not supported in zarr-python at this time."
with pytest.raises(ValueError, match=match):
Array.from_dict(StorePath(store), data=metadata_dict)


@pytest.mark.parametrize("test_cls", [Array, AsyncArray])
@pytest.mark.parametrize("nchunks", [2, 5, 10])
def test_nchunks(test_cls: type[Array] | type[AsyncArray], nchunks: int) -> None:
Expand Down
11 changes: 11 additions & 0 deletions tests/v3/test_metadata/test_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Any

from zarr.abc.codec import Codec
from zarr.core.common import JSON


import numpy as np
Expand Down Expand Up @@ -196,6 +197,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
@pytest.mark.parametrize("storage_transformers", [None, ()])
def test_metadata_to_dict(
chunk_grid: str,
codecs: list[Codec],
Expand All @@ -204,6 +206,7 @@ def test_metadata_to_dict(
dimension_separator: Literal[".", "/"] | None,
dimension_names: Literal["nones", "strings", "missing"],
attributes: None | dict[str, Any],
storage_transformers: None | tuple[dict[str, JSON]],
) -> None:
shape = (1, 2, 3)
data_type = "uint8"
Expand Down Expand Up @@ -234,6 +237,7 @@ def test_metadata_to_dict(
"chunk_key_encoding": cke,
"codecs": tuple(c.to_dict() for c in codecs),
"fill_value": fill_value,
"storage_transformers": storage_transformers,
}

if attributes is not None:
Expand All @@ -244,9 +248,16 @@ def test_metadata_to_dict(
metadata = ArrayV3Metadata.from_dict(metadata_dict)
observed = metadata.to_dict()
expected = metadata_dict.copy()

# if unset or None or (), storage_transformers gets normalized to ()
assert observed["storage_transformers"] == ()
observed.pop("storage_transformers")
expected.pop("storage_transformers")

if attributes is None:
assert observed["attributes"] == {}
observed.pop("attributes")

if dimension_separator is None:
if chunk_key_encoding == "default":
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
Expand Down