Skip to content

Commit bce30dd

Browse files
authored
chore/handle numcodecs codecs (#3376)
* bring in contents of numcodecs.zarr3 * fix tests * fix docs * fix config test * make zarr.codecs.numcodecs * complete move to zarr.codecs.numcodecs * changelog * register codecs in codecs/__init__.py * remove old registration sites
1 parent e76b1e0 commit bce30dd

File tree

18 files changed

+863
-52
lines changed

18 files changed

+863
-52
lines changed

changes/3376.misc.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Define Zarr V3-specific codecs from numcodecs inside this repo. These codecs can be found in
2+
:mod:`zarr.codecs.numcodecs`. This is necessary to resolve a circular dependency between Zarr
3+
and Numcodecs.

docs/user-guide/config.rst

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,43 @@ requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec
4242
This is the current default configuration::
4343

4444
>>> zarr.config.pprint()
45-
{'array': {'order': 'C',
46-
'write_empty_chunks': False},
47-
'async': {'concurrency': 10, 'timeout': None},
48-
'buffer': 'zarr.buffer.cpu.Buffer',
49-
'codec_pipeline': {'batch_size': 1,
50-
'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
51-
'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
45+
{'array': {'order': 'C', 'write_empty_chunks': False},
46+
'async': {'concurrency': 10, 'timeout': None},
47+
'buffer': 'zarr.buffer.cpu.Buffer',
48+
'codec_pipeline': {'batch_size': 1,
49+
'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
50+
'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
5251
'bytes': 'zarr.codecs.bytes.BytesCodec',
5352
'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
5453
'endian': 'zarr.codecs.bytes.BytesCodec',
5554
'gzip': 'zarr.codecs.gzip.GzipCodec',
55+
'numcodecs.adler32': 'zarr.codecs.numcodecs.Adler32',
56+
'numcodecs.astype': 'zarr.codecs.numcodecs.AsType',
57+
'numcodecs.bitround': 'zarr.codecs.numcodecs.BitRound',
58+
'numcodecs.blosc': 'zarr.codecs.numcodecs.Blosc',
59+
'numcodecs.bz2': 'zarr.codecs.numcodecs.BZ2',
60+
'numcodecs.crc32': 'zarr.codecs.numcodecs.CRC32',
61+
'numcodecs.crc32c': 'zarr.codecs.numcodecs.CRC32C',
62+
'numcodecs.delta': 'zarr.codecs.numcodecs.Delta',
63+
'numcodecs.fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
64+
'numcodecs.fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
65+
'numcodecs.gZip': 'zarr.codecs.numcodecs.GZip',
66+
'numcodecs.jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
67+
'numcodecs.lz4': 'zarr.codecs.numcodecs.LZ4',
68+
'numcodecs.lzma': 'zarr.codecs.numcodecs.LZMA',
69+
'numcodecs.packbits': 'zarr.codecs.numcodecs.PackBits',
70+
'numcodecs.pcodec': 'zarr.codecs.numcodecs.PCodec',
71+
'numcodecs.quantize': 'zarr.codecs.numcodecs.Quantize',
72+
'numcodecs.shuffle': 'zarr.codecs.numcodecs.Shuffle',
73+
'numcodecs.zfpy': 'zarr.codecs.numcodecs.ZFPY',
74+
'numcodecs.zlib': 'zarr.codecs.numcodecs.Zlib',
75+
'numcodecs.zstd': 'zarr.codecs.numcodecs.Zstd',
5676
'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec',
5777
'transpose': 'zarr.codecs.transpose.TransposeCodec',
5878
'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec',
5979
'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec',
6080
'zstd': 'zarr.codecs.zstd.ZstdCodec'},
61-
'default_zarr_format': 3,
62-
'json_indent': 2,
63-
'ndbuffer': 'zarr.buffer.cpu.NDBuffer',
64-
'threading': {'max_workers': None}}
81+
'default_zarr_format': 3,
82+
'json_indent': 2,
83+
'ndbuffer': 'zarr.buffer.cpu.NDBuffer',
84+
'threading': {'max_workers': None}}

src/zarr/codecs/__init__.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,34 @@
44
from zarr.codecs.bytes import BytesCodec, Endian
55
from zarr.codecs.crc32c_ import Crc32cCodec
66
from zarr.codecs.gzip import GzipCodec
7+
from zarr.codecs.numcodecs import (
8+
BZ2,
9+
CRC32,
10+
CRC32C,
11+
LZ4,
12+
LZMA,
13+
ZFPY,
14+
Adler32,
15+
AsType,
16+
BitRound,
17+
Blosc,
18+
Delta,
19+
FixedScaleOffset,
20+
Fletcher32,
21+
GZip,
22+
JenkinsLookup3,
23+
PackBits,
24+
PCodec,
25+
Quantize,
26+
Shuffle,
27+
Zlib,
28+
Zstd,
29+
)
730
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
831
from zarr.codecs.transpose import TransposeCodec
932
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
1033
from zarr.codecs.zstd import ZstdCodec
34+
from zarr.registry import register_codec
1135

1236
__all__ = [
1337
"BloscCname",
@@ -24,3 +48,46 @@
2448
"VLenUTF8Codec",
2549
"ZstdCodec",
2650
]
51+
52+
register_codec("blosc", BloscCodec)
53+
register_codec("bytes", BytesCodec)
54+
55+
# compatibility with earlier versions of ZEP1
56+
register_codec("endian", BytesCodec)
57+
register_codec("crc32c", Crc32cCodec)
58+
register_codec("gzip", GzipCodec)
59+
register_codec("sharding_indexed", ShardingCodec)
60+
register_codec("zstd", ZstdCodec)
61+
register_codec("vlen-utf8", VLenUTF8Codec)
62+
register_codec("vlen-bytes", VLenBytesCodec)
63+
register_codec("transpose", TransposeCodec)
64+
65+
# Register all the codecs formerly contained in numcodecs.zarr3
66+
67+
register_codec("numcodecs.bz2", BZ2, qualname="zarr.codecs.numcodecs.BZ2")
68+
register_codec("numcodecs.crc32", CRC32, qualname="zarr.codecs.numcodecs.CRC32")
69+
register_codec("numcodecs.crc32c", CRC32C, qualname="zarr.codecs.numcodecs.CRC32C")
70+
register_codec("numcodecs.lz4", LZ4, qualname="zarr.codecs.numcodecs.LZ4")
71+
register_codec("numcodecs.lzma", LZMA, qualname="zarr.codecs.numcodecs.LZMA")
72+
register_codec("numcodecs.zfpy", ZFPY, qualname="zarr.codecs.numcodecs.ZFPY")
73+
register_codec("numcodecs.adler32", Adler32, qualname="zarr.codecs.numcodecs.Adler32")
74+
register_codec("numcodecs.astype", AsType, qualname="zarr.codecs.numcodecs.AsType")
75+
register_codec("numcodecs.bitround", BitRound, qualname="zarr.codecs.numcodecs.BitRound")
76+
register_codec("numcodecs.blosc", Blosc, qualname="zarr.codecs.numcodecs.Blosc")
77+
register_codec("numcodecs.delta", Delta, qualname="zarr.codecs.numcodecs.Delta")
78+
register_codec(
79+
"numcodecs.fixedscaleoffset",
80+
FixedScaleOffset,
81+
qualname="zarr.codecs.numcodecs.FixedScaleOffset",
82+
)
83+
register_codec("numcodecs.fletcher32", Fletcher32, qualname="zarr.codecs.numcodecs.Fletcher32")
84+
register_codec("numcodecs.gzip", GZip, qualname="zarr.codecs.numcodecs.GZip")
85+
register_codec(
86+
"numcodecs.jenkins_lookup3", JenkinsLookup3, qualname="zarr.codecs.numcodecs.JenkinsLookup3"
87+
)
88+
register_codec("numcodecs.pcodec", PCodec, qualname="zarr.codecs.numcodecs.pcodec")
89+
register_codec("numcodecs.packbits", PackBits, qualname="zarr.codecs.numcodecs.PackBits")
90+
register_codec("numcodecs.quantize", Quantize, qualname="zarr.codecs.numcodecs.Quantize")
91+
register_codec("numcodecs.shuffle", Shuffle, qualname="zarr.codecs.numcodecs.Shuffle")
92+
register_codec("numcodecs.zlib", Zlib, qualname="zarr.codecs.numcodecs.Zlib")
93+
register_codec("numcodecs.zstd", Zstd, qualname="zarr.codecs.numcodecs.Zstd")

src/zarr/codecs/blosc.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from zarr.core.buffer.cpu import as_numpy_array_wrapper
1515
from zarr.core.common import JSON, parse_enum, parse_named_configuration
1616
from zarr.core.dtype.common import HasItemSize
17-
from zarr.registry import register_codec
1817

1918
if TYPE_CHECKING:
2019
from typing import Self
@@ -199,6 +198,3 @@ async def _encode_single(
199198

200199
def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
201200
raise NotImplementedError
202-
203-
204-
register_codec("blosc", BloscCodec)

src/zarr/codecs/bytes.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
1212
from zarr.core.common import JSON, parse_enum, parse_named_configuration
1313
from zarr.core.dtype.common import HasEndianness
14-
from zarr.registry import register_codec
1514

1615
if TYPE_CHECKING:
1716
from typing import Self
@@ -119,9 +118,3 @@ async def _encode_single(
119118

120119
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
121120
return input_byte_length
122-
123-
124-
register_codec("bytes", BytesCodec)
125-
126-
# compatibility with earlier versions of ZEP1
127-
register_codec("endian", BytesCodec)

src/zarr/codecs/crc32c_.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
from zarr.abc.codec import BytesBytesCodec
1111
from zarr.core.common import JSON, parse_named_configuration
12-
from zarr.registry import register_codec
1312

1413
if TYPE_CHECKING:
1514
from typing import Self
@@ -65,6 +64,3 @@ async def _encode_single(
6564

6665
def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
6766
return input_byte_length + 4
68-
69-
70-
register_codec("crc32c", Crc32cCodec)

src/zarr/codecs/gzip.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from zarr.abc.codec import BytesBytesCodec
1010
from zarr.core.buffer.cpu import as_numpy_array_wrapper
1111
from zarr.core.common import JSON, parse_named_configuration
12-
from zarr.registry import register_codec
1312

1413
if TYPE_CHECKING:
1514
from typing import Self
@@ -73,6 +72,3 @@ def compute_encoded_size(
7372
_chunk_spec: ArraySpec,
7473
) -> int:
7574
raise NotImplementedError
76-
77-
78-
register_codec("gzip", GzipCodec)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from __future__ import annotations
2+
3+
from zarr.codecs.numcodecs._codecs import (
4+
BZ2,
5+
CRC32,
6+
CRC32C,
7+
LZ4,
8+
LZMA,
9+
ZFPY,
10+
Adler32,
11+
AsType,
12+
BitRound,
13+
Blosc,
14+
Delta,
15+
FixedScaleOffset,
16+
Fletcher32,
17+
GZip,
18+
JenkinsLookup3,
19+
PackBits,
20+
PCodec,
21+
Quantize,
22+
Shuffle,
23+
Zlib,
24+
Zstd,
25+
_NumcodecsArrayArrayCodec,
26+
_NumcodecsArrayBytesCodec,
27+
_NumcodecsBytesBytesCodec,
28+
_NumcodecsCodec,
29+
)
30+
31+
__all__ = [
32+
"BZ2",
33+
"CRC32",
34+
"CRC32C",
35+
"LZ4",
36+
"LZMA",
37+
"ZFPY",
38+
"Adler32",
39+
"AsType",
40+
"BitRound",
41+
"Blosc",
42+
"Delta",
43+
"FixedScaleOffset",
44+
"Fletcher32",
45+
"GZip",
46+
"JenkinsLookup3",
47+
"PCodec",
48+
"PackBits",
49+
"Quantize",
50+
"Shuffle",
51+
"Zlib",
52+
"Zstd",
53+
"_NumcodecsArrayArrayCodec",
54+
"_NumcodecsArrayBytesCodec",
55+
"_NumcodecsBytesBytesCodec",
56+
"_NumcodecsCodec",
57+
]

0 commit comments

Comments
 (0)