|
2 | 2 |
|
3 | 3 | from collections.abc import Iterable, Iterator
|
4 | 4 | from dataclasses import dataclass
|
5 |
| -from itertools import islice |
6 |
| -from typing import TYPE_CHECKING, TypeVar |
| 5 | +from itertools import islice, pairwise |
| 6 | +from typing import TYPE_CHECKING, Any, TypeVar |
7 | 7 | from warnings import warn
|
8 | 8 |
|
| 9 | +import numpy as np |
| 10 | + |
9 | 11 | from zarr.abc.codec import (
|
10 | 12 | ArrayArrayCodec,
|
11 | 13 | ArrayBytesCodec,
|
|
17 | 19 | )
|
18 | 20 | from zarr.abc.store import ByteGetter, ByteSetter
|
19 | 21 | from zarr.buffer import Buffer, BufferPrototype, NDBuffer
|
| 22 | +from zarr.chunk_grids import ChunkGrid |
20 | 23 | from zarr.codecs.registry import get_codec_class
|
21 |
| -from zarr.common import JSON, concurrent_map, parse_named_configuration |
| 24 | +from zarr.common import JSON, ChunkCoords, concurrent_map, parse_named_configuration |
22 | 25 | from zarr.config import config
|
23 | 26 | from zarr.indexing import SelectorTuple, is_scalar, is_total_slice
|
24 |
| -from zarr.metadata import ArrayMetadata |
25 | 27 |
|
26 | 28 | if TYPE_CHECKING:
|
27 | 29 | from typing_extensions import Self
|
@@ -87,54 +89,11 @@ def to_dict(self) -> JSON:
|
87 | 89 | return [c.to_dict() for c in self]
|
88 | 90 |
|
89 | 91 | def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
|
90 |
| - return type(self).from_list([c.evolve_from_array_spec(array_spec) for c in self]) |
91 |
| - |
92 |
| - @staticmethod |
93 |
| - def codecs_from_list( |
94 |
| - codecs: list[Codec], |
95 |
| - ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: |
96 |
| - from zarr.codecs.sharding import ShardingCodec |
97 |
| - |
98 |
| - if not any(isinstance(codec, ArrayBytesCodec) for codec in codecs): |
99 |
| - raise ValueError("Exactly one array-to-bytes codec is required.") |
100 |
| - |
101 |
| - prev_codec: Codec | None = None |
102 |
| - for codec in codecs: |
103 |
| - if prev_codec is not None: |
104 |
| - if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, ArrayBytesCodec): |
105 |
| - raise ValueError( |
106 |
| - f"ArrayBytesCodec '{type(codec)}' cannot follow after ArrayBytesCodec '{type(prev_codec)}' because exactly 1 ArrayBytesCodec is allowed." |
107 |
| - ) |
108 |
| - if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, BytesBytesCodec): |
109 |
| - raise ValueError( |
110 |
| - f"ArrayBytesCodec '{type(codec)}' cannot follow after BytesBytesCodec '{type(prev_codec)}'." |
111 |
| - ) |
112 |
| - if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, ArrayBytesCodec): |
113 |
| - raise ValueError( |
114 |
| - f"ArrayArrayCodec '{type(codec)}' cannot follow after ArrayBytesCodec '{type(prev_codec)}'." |
115 |
| - ) |
116 |
| - if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, BytesBytesCodec): |
117 |
| - raise ValueError( |
118 |
| - f"ArrayArrayCodec '{type(codec)}' cannot follow after BytesBytesCodec '{type(prev_codec)}'." |
119 |
| - ) |
120 |
| - prev_codec = codec |
121 |
| - |
122 |
| - if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(codecs) > 1: |
123 |
| - warn( |
124 |
| - "Combining a `sharding_indexed` codec disables partial reads and " |
125 |
| - "writes, which may lead to inefficient performance.", |
126 |
| - stacklevel=3, |
127 |
| - ) |
128 |
| - |
129 |
| - return ( |
130 |
| - tuple(codec for codec in codecs if isinstance(codec, ArrayArrayCodec)), |
131 |
| - next(codec for codec in codecs if isinstance(codec, ArrayBytesCodec)), |
132 |
| - tuple(codec for codec in codecs if isinstance(codec, BytesBytesCodec)), |
133 |
| - ) |
| 92 | + return type(self).from_list([c.evolve_from_array_spec(array_spec=array_spec) for c in self]) |
134 | 93 |
|
135 | 94 | @classmethod
|
136 |
| - def from_list(cls, codecs: list[Codec], *, batch_size: int | None = None) -> Self: |
137 |
| - array_array_codecs, array_bytes_codec, bytes_bytes_codecs = cls.codecs_from_list(codecs) |
| 95 | + def from_list(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self: |
| 96 | + array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(codecs) |
138 | 97 |
|
139 | 98 | return cls(
|
140 | 99 | array_array_codecs=array_array_codecs,
|
@@ -180,9 +139,9 @@ def __iter__(self) -> Iterator[Codec]:
|
180 | 139 | yield self.array_bytes_codec
|
181 | 140 | yield from self.bytes_bytes_codecs
|
182 | 141 |
|
183 |
| - def validate(self, array_metadata: ArrayMetadata) -> None: |
| 142 | + def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None: |
184 | 143 | for codec in self:
|
185 |
| - codec.validate(array_metadata) |
| 144 | + codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid) |
186 | 145 |
|
187 | 146 | def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int:
|
188 | 147 | for codec in self:
|
@@ -509,3 +468,64 @@ async def write(
|
509 | 468 | self.write_batch,
|
510 | 469 | config.get("async.concurrency"),
|
511 | 470 | )
|
| 471 | + |
| 472 | + |
| 473 | +def codecs_from_list( |
| 474 | + codecs: Iterable[Codec], |
| 475 | +) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: |
| 476 | + from zarr.codecs.sharding import ShardingCodec |
| 477 | + |
| 478 | + array_array: tuple[ArrayArrayCodec, ...] = () |
| 479 | + array_bytes_maybe: ArrayBytesCodec | None = None |
| 480 | + bytes_bytes: tuple[BytesBytesCodec, ...] = () |
| 481 | + |
| 482 | + if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(tuple(codecs)) > 1: |
| 483 | + warn( |
| 484 | + "Combining a `sharding_indexed` codec disables partial reads and " |
| 485 | + "writes, which may lead to inefficient performance.", |
| 486 | + stacklevel=3, |
| 487 | + ) |
| 488 | + |
| 489 | + for prev_codec, cur_codec in pairwise((None, *codecs)): |
| 490 | + if isinstance(cur_codec, ArrayArrayCodec): |
| 491 | + if isinstance(prev_codec, ArrayBytesCodec | BytesBytesCodec): |
| 492 | + msg = ( |
| 493 | + f"Invalid codec order. ArrayArrayCodec {cur_codec}" |
| 494 | + "must be preceded by another ArrayArrayCodec. " |
| 495 | + f"Got {type(prev_codec)} instead." |
| 496 | + ) |
| 497 | + raise ValueError(msg) |
| 498 | + array_array += (cur_codec,) |
| 499 | + |
| 500 | + elif isinstance(cur_codec, ArrayBytesCodec): |
| 501 | + if isinstance(prev_codec, BytesBytesCodec): |
| 502 | + msg = ( |
| 503 | + f"Invalid codec order. ArrayBytes codec {cur_codec}" |
| 504 | + f" must be preceded by an ArrayArrayCodec. Got {type(prev_codec)} instead." |
| 505 | + ) |
| 506 | + raise ValueError(msg) |
| 507 | + |
| 508 | + if array_bytes_maybe is not None: |
| 509 | + msg = ( |
| 510 | + f"Got two instances of ArrayBytesCodec: {array_bytes_maybe} and {cur_codec}. " |
| 511 | + "Only one array-to-bytes codec is allowed." |
| 512 | + ) |
| 513 | + raise ValueError(msg) |
| 514 | + |
| 515 | + array_bytes_maybe = cur_codec |
| 516 | + |
| 517 | + elif isinstance(cur_codec, BytesBytesCodec): |
| 518 | + if isinstance(prev_codec, ArrayArrayCodec): |
| 519 | + msg = ( |
| 520 | + f"Invalid codec order. BytesBytesCodec {cur_codec}" |
| 521 | + "must be preceded by either another BytesBytesCodec, or an ArrayBytesCodec. " |
| 522 | + f"Got {type(prev_codec)} instead." |
| 523 | + ) |
| 524 | + bytes_bytes += (cur_codec,) |
| 525 | + else: |
| 526 | + raise AssertionError |
| 527 | + |
| 528 | + if array_bytes_maybe is None: |
| 529 | + raise ValueError("Required ArrayBytesCodec was not found.") |
| 530 | + else: |
| 531 | + return array_array, array_bytes_maybe, bytes_bytes |
0 commit comments