Skip to content

Commit 629b4e5

Browse files
dstansbyd-v-b
andauthored
Allow no compressor for v2 arrays (#3039)
* Allow no compressor for v2 arrays * Use typing aliases for compressors * Test v2 array w/ v3 codec errors * Add changelog entry * Update type comment * fix test names Co-authored-by: Davis Bennett <[email protected]> --------- Co-authored-by: Davis Bennett <[email protected]>
1 parent 7584b96 commit 629b4e5

File tree

6 files changed

+70
-19
lines changed

6 files changed

+70
-19
lines changed

changes/3039.bugfix.rst

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
It is now possible to specify no compressor when creating a zarr format 2 array.
2+
This can be done by passing ``compressor=None`` to the various array creation routines.
3+
4+
The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
5+
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.

src/zarr/api/asynchronous.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
12+
from zarr.core.array import (
13+
Array,
14+
AsyncArray,
15+
CompressorLike,
16+
create_array,
17+
from_array,
18+
get_array_metadata,
19+
)
1320
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1421
from zarr.core.buffer import NDArrayLike
1522
from zarr.core.common import (
@@ -838,7 +845,7 @@ async def create(
838845
*, # Note: this is a change from v2
839846
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
840847
dtype: npt.DTypeLike | None = None,
841-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
848+
compressor: CompressorLike = "auto",
842849
fill_value: Any | None = 0, # TODO: need type
843850
order: MemoryOrder | None = None,
844851
store: str | StoreLike | None = None,
@@ -991,7 +998,7 @@ async def create(
991998
dtype = parse_dtype(dtype, zarr_format)
992999
if not filters:
9931000
filters = _default_filters(dtype)
994-
if not compressor:
1001+
if compressor == "auto":
9951002
compressor = _default_compressor(dtype)
9961003
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
9971004
if chunks is not None:

src/zarr/api/synchronous.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -599,7 +599,7 @@ def create(
599599
*, # Note: this is a change from v2
600600
chunks: ChunkCoords | int | bool | None = None,
601601
dtype: npt.DTypeLike | None = None,
602-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
602+
compressor: CompressorLike = "auto",
603603
fill_value: Any | None = 0, # TODO: need type
604604
order: MemoryOrder | None = None,
605605
store: str | StoreLike | None = None,

src/zarr/core/array.py

+27-11
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
T_ArrayMetadata,
103103
)
104104
from zarr.core.metadata.v2 import (
105+
CompressorLikev2,
105106
_default_compressor,
106107
_default_filters,
107108
parse_compressor,
@@ -303,7 +304,7 @@ async def create(
303304
dimension_separator: Literal[".", "/"] | None = None,
304305
order: MemoryOrder | None = None,
305306
filters: list[dict[str, JSON]] | None = None,
306-
compressor: dict[str, JSON] | None = None,
307+
compressor: CompressorLikev2 | Literal["auto"] = "auto",
307308
# runtime
308309
overwrite: bool = False,
309310
data: npt.ArrayLike | None = None,
@@ -394,7 +395,7 @@ async def create(
394395
dimension_separator: Literal[".", "/"] | None = None,
395396
order: MemoryOrder | None = None,
396397
filters: list[dict[str, JSON]] | None = None,
397-
compressor: dict[str, JSON] | None = None,
398+
compressor: CompressorLike = "auto",
398399
# runtime
399400
overwrite: bool = False,
400401
data: npt.ArrayLike | None = None,
@@ -429,7 +430,7 @@ async def create(
429430
dimension_separator: Literal[".", "/"] | None = None,
430431
order: MemoryOrder | None = None,
431432
filters: list[dict[str, JSON]] | None = None,
432-
compressor: dict[str, JSON] | None = None,
433+
compressor: CompressorLike = "auto",
433434
# runtime
434435
overwrite: bool = False,
435436
data: npt.ArrayLike | None = None,
@@ -570,7 +571,7 @@ async def _create(
570571
dimension_separator: Literal[".", "/"] | None = None,
571572
order: MemoryOrder | None = None,
572573
filters: list[dict[str, JSON]] | None = None,
573-
compressor: dict[str, JSON] | None = None,
574+
compressor: CompressorLike = "auto",
574575
# runtime
575576
overwrite: bool = False,
576577
data: npt.ArrayLike | None = None,
@@ -604,7 +605,7 @@ async def _create(
604605
raise ValueError(
605606
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
606607
)
607-
if compressor is not None:
608+
if compressor != "auto":
608609
raise ValueError(
609610
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
610611
)
@@ -768,7 +769,7 @@ def _create_metadata_v2(
768769
dimension_separator: Literal[".", "/"] | None = None,
769770
fill_value: float | None = None,
770771
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
771-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
772+
compressor: CompressorLikev2 = None,
772773
attributes: dict[str, JSON] | None = None,
773774
) -> ArrayV2Metadata:
774775
if dimension_separator is None:
@@ -809,7 +810,7 @@ async def _create_v2(
809810
dimension_separator: Literal[".", "/"] | None = None,
810811
fill_value: float | None = None,
811812
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
812-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
813+
compressor: CompressorLike = "auto",
813814
attributes: dict[str, JSON] | None = None,
814815
overwrite: bool = False,
815816
) -> AsyncArray[ArrayV2Metadata]:
@@ -821,6 +822,17 @@ async def _create_v2(
821822
else:
822823
await ensure_no_existing_node(store_path, zarr_format=2)
823824

825+
compressor_parsed: CompressorLikev2
826+
if compressor == "auto":
827+
compressor_parsed = _default_compressor(dtype)
828+
elif isinstance(compressor, BytesBytesCodec):
829+
raise ValueError(
830+
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
831+
"Use a numcodecs codec directly instead."
832+
)
833+
else:
834+
compressor_parsed = compressor
835+
824836
metadata = cls._create_metadata_v2(
825837
shape=shape,
826838
dtype=dtype,
@@ -829,7 +841,7 @@ async def _create_v2(
829841
dimension_separator=dimension_separator,
830842
fill_value=fill_value,
831843
filters=filters,
832-
compressor=compressor,
844+
compressor=compressor_parsed,
833845
attributes=attributes,
834846
)
835847

@@ -1751,7 +1763,7 @@ def create(
17511763
dimension_separator: Literal[".", "/"] | None = None,
17521764
order: MemoryOrder | None = None,
17531765
filters: list[dict[str, JSON]] | None = None,
1754-
compressor: dict[str, JSON] | None = None,
1766+
compressor: CompressorLike = "auto",
17551767
# runtime
17561768
overwrite: bool = False,
17571769
config: ArrayConfigLike | None = None,
@@ -1880,7 +1892,7 @@ def _create(
18801892
dimension_separator: Literal[".", "/"] | None = None,
18811893
order: MemoryOrder | None = None,
18821894
filters: list[dict[str, JSON]] | None = None,
1883-
compressor: dict[str, JSON] | None = None,
1895+
compressor: CompressorLike = "auto",
18841896
# runtime
18851897
overwrite: bool = False,
18861898
config: ArrayConfigLike | None = None,
@@ -3792,7 +3804,11 @@ def _get_default_codecs(
37923804
| Literal["auto"]
37933805
| None
37943806
)
3795-
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
3807+
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3808+
CompressorLike: TypeAlias = (
3809+
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3810+
)
3811+
37963812
CompressorsLike: TypeAlias = (
37973813
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
37983814
| dict[str, JSON]

src/zarr/core/metadata/v2.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Iterable, Sequence
66
from enum import Enum
77
from functools import cached_property
8-
from typing import TYPE_CHECKING, Any, TypedDict, cast
8+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
99

1010
import numcodecs.abc
1111

@@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
4343
attributes: dict[str, JSON]
4444

4545

46+
# Union of acceptable types for v2 compressors
47+
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
48+
49+
4650
@dataclass(frozen=True, kw_only=True)
4751
class ArrayV2Metadata(Metadata):
4852
shape: ChunkCoords
@@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
5256
order: MemoryOrder = "C"
5357
filters: tuple[numcodecs.abc.Codec, ...] | None = None
5458
dimension_separator: Literal[".", "/"] = "."
55-
compressor: numcodecs.abc.Codec | None = None
59+
compressor: CompressorLikev2
5660
attributes: dict[str, JSON] = field(default_factory=dict)
5761
zarr_format: Literal[2] = field(init=False, default=2)
5862

@@ -65,7 +69,7 @@ def __init__(
6569
fill_value: Any,
6670
order: MemoryOrder,
6771
dimension_separator: Literal[".", "/"] = ".",
68-
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
72+
compressor: CompressorLikev2 = None,
6973
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
7074
attributes: dict[str, JSON] | None = None,
7175
) -> None:

tests/test_api.py

+19
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import TYPE_CHECKING
44

5+
import zarr.codecs
6+
57
if TYPE_CHECKING:
68
import pathlib
79

@@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
11901192
# assert_array_equal doesn't check the type
11911193
assert isinstance(result, type(src))
11921194
cp.testing.assert_array_equal(result, src[:10, :10])
1195+
1196+
1197+
def test_v2_without_compressor() -> None:
1198+
# Make sure it's possible to set no compressor for v2 arrays
1199+
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
1200+
assert arr.compressors == ()
1201+
1202+
1203+
def test_v2_with_v3_compressor() -> None:
1204+
# Check trying to create a v2 array with a v3 compressor fails
1205+
with pytest.raises(
1206+
ValueError,
1207+
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
1208+
):
1209+
zarr.create(
1210+
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
1211+
)

0 commit comments

Comments
 (0)