Skip to content

Commit 6b11bb8

Browse files
d-v-bjhamman
andauthored
Refactor/rename v2 metadata fields (#2301)
* remove array v2 metadata data_type and chunk_grid attributes, using dtype and chunks instead * remove inheritance from ArrayMetadata * make asyncarray generic w.r.t metadata * Copy tom augsburger's overload * minimal typedicts for array metadata * clean up imports * make chunk_grid a cached property for v2 metadata * add T suffix to type aliases * make asyncarray.create overloads more specific, and add some comments * use regular names for unions, but use T_ prefix for typevar --------- Co-authored-by: Joe Hamman <[email protected]>
1 parent 395604d commit 6b11bb8

File tree

8 files changed

+342
-173
lines changed

8 files changed

+342
-173
lines changed

src/zarr/api/asynchronous.py

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
1313
from zarr.core.config import config
1414
from zarr.core.group import AsyncGroup
15-
from zarr.core.metadata.v2 import ArrayV2Metadata
16-
from zarr.core.metadata.v3 import ArrayV3Metadata
15+
from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
1716
from zarr.errors import NodeTypeValidationError
1817
from zarr.storage import (
1918
StoreLike,
@@ -29,7 +28,7 @@
2928
from zarr.core.chunk_key_encodings import ChunkKeyEncoding
3029

3130
# TODO: this type could use some more thought
32-
ArrayLike = AsyncArray | Array | npt.NDArray[Any]
31+
ArrayLike = AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array | npt.NDArray[Any]
3332
PathLike = str
3433

3534
__all__ = [
@@ -98,11 +97,11 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]:
9897
if isinstance(a.metadata, ArrayV2Metadata):
9998
new["compressor"] = a.metadata.compressor
10099
new["filters"] = a.metadata.filters
101-
102-
if isinstance(a.metadata, ArrayV3Metadata):
103-
new["codecs"] = a.metadata.codecs
104100
else:
105-
raise TypeError(f"Unsupported zarr format: {a.metadata.zarr_format}")
101+
# TODO: Remove type: ignore statement when type inference improves.
102+
# mypy cannot correctly infer the type of a.metadata here for some reason.
103+
new["codecs"] = a.metadata.codecs # type: ignore[unreachable]
104+
106105
else:
107106
# TODO: set default values compressor/codecs
108107
# to do this, we may need to evaluate if this is a v2 or v3 array
@@ -199,7 +198,7 @@ async def open(
199198
path: str | None = None,
200199
storage_options: dict[str, Any] | None = None,
201200
**kwargs: Any, # TODO: type kwargs as valid args to open_array
202-
) -> AsyncArray | AsyncGroup:
201+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup:
203202
"""Convenience function to open a group or array using file-mode-like semantics.
204203
205204
Parameters
@@ -237,11 +236,13 @@ async def open(
237236
if "shape" not in kwargs and mode in {"a", "w", "w-"}:
238237
try:
239238
metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format)
239+
# TODO: remove this cast when we fix typing for array metadata dicts
240+
_metadata_dict = cast(ArrayMetadataDict, metadata_dict)
240241
# for v2, the above would already have raised an exception if not an array
241-
zarr_format = metadata_dict["zarr_format"]
242-
is_v3_array = zarr_format == 3 and metadata_dict.get("node_type") == "array"
242+
zarr_format = _metadata_dict["zarr_format"]
243+
is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array"
243244
if is_v3_array or zarr_format == 2:
244-
return AsyncArray(store_path=store_path, metadata=metadata_dict)
245+
return AsyncArray(store_path=store_path, metadata=_metadata_dict)
245246
except (AssertionError, FileNotFoundError):
246247
pass
247248
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
@@ -404,7 +405,9 @@ async def tree(*args: Any, **kwargs: Any) -> None:
404405
raise NotImplementedError
405406

406407

407-
async def array(data: npt.ArrayLike, **kwargs: Any) -> AsyncArray:
408+
async def array(
409+
data: npt.ArrayLike, **kwargs: Any
410+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
408411
"""Create an array filled with `data`.
409412
410413
Parameters
@@ -658,7 +661,7 @@ async def create(
658661
dimension_names: Iterable[str] | None = None,
659662
storage_options: dict[str, Any] | None = None,
660663
**kwargs: Any,
661-
) -> AsyncArray:
664+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
662665
"""Create an array.
663666
664667
Parameters
@@ -810,7 +813,9 @@ async def create(
810813
)
811814

812815

813-
async def empty(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
816+
async def empty(
817+
shape: ChunkCoords, **kwargs: Any
818+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
814819
"""Create an empty array.
815820
816821
Parameters
@@ -829,7 +834,9 @@ async def empty(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
829834
return await create(shape=shape, fill_value=None, **kwargs)
830835

831836

832-
async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
837+
async def empty_like(
838+
a: ArrayLike, **kwargs: Any
839+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
833840
"""Create an empty array like `a`.
834841
835842
Parameters
@@ -849,7 +856,9 @@ async def empty_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
849856

850857

851858
# TODO: add type annotations for fill_value and kwargs
852-
async def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> AsyncArray:
859+
async def full(
860+
shape: ChunkCoords, fill_value: Any, **kwargs: Any
861+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
853862
"""Create an array, with `fill_value` being used as the default value for
854863
uninitialized portions of the array.
855864
@@ -871,7 +880,9 @@ async def full(shape: ChunkCoords, fill_value: Any, **kwargs: Any) -> AsyncArray
871880

872881

873882
# TODO: add type annotations for kwargs
874-
async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
883+
async def full_like(
884+
a: ArrayLike, **kwargs: Any
885+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
875886
"""Create a filled array like `a`.
876887
877888
Parameters
@@ -892,7 +903,9 @@ async def full_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
892903
return await full(**like_kwargs)
893904

894905

895-
async def ones(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
906+
async def ones(
907+
shape: ChunkCoords, **kwargs: Any
908+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
896909
"""Create an array, with one being used as the default value for
897910
uninitialized portions of the array.
898911
@@ -911,7 +924,9 @@ async def ones(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
911924
return await create(shape=shape, fill_value=1, **kwargs)
912925

913926

914-
async def ones_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
927+
async def ones_like(
928+
a: ArrayLike, **kwargs: Any
929+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
915930
"""Create an array of ones like `a`.
916931
917932
Parameters
@@ -938,7 +953,7 @@ async def open_array(
938953
path: PathLike | None = None,
939954
storage_options: dict[str, Any] | None = None,
940955
**kwargs: Any, # TODO: type kwargs as valid args to save
941-
) -> AsyncArray:
956+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
942957
"""Open an array using file-mode-like semantics.
943958
944959
Parameters
@@ -981,7 +996,9 @@ async def open_array(
981996
raise
982997

983998

984-
async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray:
999+
async def open_like(
1000+
a: ArrayLike, path: str, **kwargs: Any
1001+
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]:
9851002
"""Open a persistent array like `a`.
9861003
9871004
Parameters
@@ -1004,7 +1021,9 @@ async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AsyncArray:
10041021
return await open_array(path=path, **like_kwargs)
10051022

10061023

1007-
async def zeros(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
1024+
async def zeros(
1025+
shape: ChunkCoords, **kwargs: Any
1026+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
10081027
"""Create an array, with zero being used as the default value for
10091028
uninitialized portions of the array.
10101029
@@ -1023,7 +1042,9 @@ async def zeros(shape: ChunkCoords, **kwargs: Any) -> AsyncArray:
10231042
return await create(shape=shape, fill_value=0, **kwargs)
10241043

10251044

1026-
async def zeros_like(a: ArrayLike, **kwargs: Any) -> AsyncArray:
1045+
async def zeros_like(
1046+
a: ArrayLike, **kwargs: Any
1047+
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
10271048
"""Create an array of zeros like `a`.
10281049
10291050
Parameters

0 commit comments

Comments
 (0)