|
3 | 3 | import asyncio
|
4 | 4 | import json
|
5 | 5 | import logging
|
| 6 | +from collections import defaultdict |
6 | 7 | from dataclasses import asdict, dataclass, field, replace
|
7 | 8 | from typing import TYPE_CHECKING, Literal, cast, overload
|
8 | 9 |
|
|
28 | 29 | parse_shapelike,
|
29 | 30 | )
|
30 | 31 | from zarr.core.config import config
|
31 |
| -from zarr.core.metadata import ArrayMetadata, ArrayV3Metadata |
| 32 | +from zarr.core.metadata import ArrayMetadata, ArrayV2Metadata, ArrayV3Metadata |
32 | 33 | from zarr.core.sync import SyncMixin, sync
|
33 | 34 | from zarr.store import StoreLike, StorePath, make_store_path
|
34 | 35 | from zarr.store.common import ensure_no_existing_node
|
@@ -126,7 +127,14 @@ def from_dict(cls, data: dict[str, JSON]) -> ConsolidatedMetadata:
|
126 | 127 | elif node_type == "array":
|
127 | 128 | metadata[k] = ArrayV3Metadata.from_dict(v)
|
128 | 129 | else:
|
129 |
| - raise ValueError(f"Invalid node_type: '{node_type}'") |
| 130 | + # We either have V2 metadata, or invalid metadata |
| 131 | + if "shape" in v: |
| 132 | + # probably ArrayV2Metadata |
| 133 | + metadata[k] = ArrayV2Metadata.from_dict(v) |
| 134 | + else: |
| 135 | + # probably v2 Group metadata |
| 136 | + metadata[k] = GroupMetadata.from_dict(v) |
| 137 | + |
130 | 138 | # assert data["kind"] == "inline"
|
131 | 139 | if data["kind"] != "inline":
|
132 | 140 | raise ValueError
|
@@ -226,15 +234,26 @@ async def open(
|
226 | 234 | cls,
|
227 | 235 | store: StoreLike,
|
228 | 236 | zarr_format: Literal[2, 3, None] = 3,
|
| 237 | + open_consolidated: bool = False, |
229 | 238 | ) -> AsyncGroup:
|
230 | 239 | store_path = await make_store_path(store)
|
231 | 240 |
|
232 | 241 | if zarr_format == 2:
|
233 |
| - zgroup_bytes, zattrs_bytes = await asyncio.gather( |
234 |
| - (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() |
| 242 | + paths = [store_path / ZGROUP_JSON, store_path / ZATTRS_JSON] |
| 243 | + if open_consolidated: |
| 244 | + paths.append(store_path / ".zmetadata") # todo: configurable |
| 245 | + |
| 246 | + zgroup_bytes, zattrs_bytes, *rest = await asyncio.gather( |
| 247 | + *[path.get() for path in paths] |
235 | 248 | )
|
236 | 249 | if zgroup_bytes is None:
|
237 | 250 | raise FileNotFoundError(store_path)
|
| 251 | + |
| 252 | + if open_consolidated: |
| 253 | + consolidated_metadata_bytes = rest[0] |
| 254 | + if consolidated_metadata_bytes is None: |
| 255 | + raise FileNotFoundError(paths[-1]) |
| 256 | + |
238 | 257 | elif zarr_format == 3:
|
239 | 258 | zarr_json_bytes = await (store_path / ZARR_JSON).get()
|
240 | 259 | if zarr_json_bytes is None:
|
@@ -265,6 +284,37 @@ async def open(
|
265 | 284 | zgroup = json.loads(zgroup_bytes.to_bytes())
|
266 | 285 | zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
|
267 | 286 | group_metadata = {**zgroup, "attributes": zattrs}
|
| 287 | + |
| 288 | + if open_consolidated: |
| 289 | + # this *should* be defined. |
| 290 | + assert consolidated_metadata_bytes is not None # already checked above |
| 291 | + |
| 292 | + v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes()) |
| 293 | + v2_consolidated_metadata = v2_consolidated_metadata["metadata"] |
| 294 | + # We already read zattrs and zgroup. Should we ignore these? |
| 295 | + v2_consolidated_metadata.pop(".zattrs") |
| 296 | + v2_consolidated_metadata.pop(".zgroup") |
| 297 | + |
| 298 | + consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict) |
| 299 | + |
| 300 | + # keys like air/.zarray, air/.zattrs |
| 301 | + for k, v in v2_consolidated_metadata.items(): |
| 302 | + path, kind = k.rsplit("/.", 1) |
| 303 | + |
| 304 | + if kind == "zarray": |
| 305 | + consolidated_metadata[path].update(v) |
| 306 | + elif kind == "zattrs": |
| 307 | + consolidated_metadata[path]["attributes"] = v |
| 308 | + elif kind == "zgroup": |
| 309 | + consolidated_metadata[path].update(v) |
| 310 | + else: |
| 311 | + raise ValueError(f"Invalid file type '{kind}' at path '{path}") |
| 312 | + group_metadata["consolidated_metadata"] = { |
| 313 | + "metadata": dict(consolidated_metadata), |
| 314 | + "kind": "inline", |
| 315 | + "must_understand": False, |
| 316 | + } |
| 317 | + |
268 | 318 | else:
|
269 | 319 | # V3 groups are comprised of a zarr.json object
|
270 | 320 | assert zarr_json_bytes is not None
|
|
0 commit comments