Skip to content

chore/handle numcodecs codecs #3376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changes/3376.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Define Zarr V3-specific codecs from numcodecs inside this repo. These codecs can be found in
:mod:`zarr.codecs.numcodecs`. This is necessary to resolve a circular dependency between Zarr
and Numcodecs.
42 changes: 31 additions & 11 deletions docs/user-guide/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,43 @@ requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec
This is the current default configuration::

>>> zarr.config.pprint()
{'array': {'order': 'C',
'write_empty_chunks': False},
'async': {'concurrency': 10, 'timeout': None},
'buffer': 'zarr.buffer.cpu.Buffer',
'codec_pipeline': {'batch_size': 1,
'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
{'array': {'order': 'C', 'write_empty_chunks': False},
'async': {'concurrency': 10, 'timeout': None},
'buffer': 'zarr.buffer.cpu.Buffer',
'codec_pipeline': {'batch_size': 1,
'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
'bytes': 'zarr.codecs.bytes.BytesCodec',
'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
'endian': 'zarr.codecs.bytes.BytesCodec',
'gzip': 'zarr.codecs.gzip.GzipCodec',
'numcodecs.adler32': 'zarr.codecs.numcodecs.Adler32',
'numcodecs.astype': 'zarr.codecs.numcodecs.AsType',
'numcodecs.bitround': 'zarr.codecs.numcodecs.BitRound',
'numcodecs.blosc': 'zarr.codecs.numcodecs.Blosc',
'numcodecs.bz2': 'zarr.codecs.numcodecs.BZ2',
'numcodecs.crc32': 'zarr.codecs.numcodecs.CRC32',
'numcodecs.crc32c': 'zarr.codecs.numcodecs.CRC32C',
'numcodecs.delta': 'zarr.codecs.numcodecs.Delta',
'numcodecs.fixedscaleoffset': 'zarr.codecs.numcodecs.FixedScaleOffset',
'numcodecs.fletcher32': 'zarr.codecs.numcodecs.Fletcher32',
'numcodecs.gZip': 'zarr.codecs.numcodecs.GZip',
'numcodecs.jenkins_lookup3': 'zarr.codecs.numcodecs.JenkinsLookup3',
'numcodecs.lz4': 'zarr.codecs.numcodecs.LZ4',
'numcodecs.lzma': 'zarr.codecs.numcodecs.LZMA',
'numcodecs.packbits': 'zarr.codecs.numcodecs.PackBits',
'numcodecs.pcodec': 'zarr.codecs.numcodecs.PCodec',
'numcodecs.quantize': 'zarr.codecs.numcodecs.Quantize',
'numcodecs.shuffle': 'zarr.codecs.numcodecs.Shuffle',
'numcodecs.zfpy': 'zarr.codecs.numcodecs.ZFPY',
'numcodecs.zlib': 'zarr.codecs.numcodecs.Zlib',
'numcodecs.zstd': 'zarr.codecs.numcodecs.Zstd',
'sharding_indexed': 'zarr.codecs.sharding.ShardingCodec',
'transpose': 'zarr.codecs.transpose.TransposeCodec',
'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec',
'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec',
'zstd': 'zarr.codecs.zstd.ZstdCodec'},
'default_zarr_format': 3,
'json_indent': 2,
'ndbuffer': 'zarr.buffer.cpu.NDBuffer',
'threading': {'max_workers': None}}
'default_zarr_format': 3,
'json_indent': 2,
'ndbuffer': 'zarr.buffer.cpu.NDBuffer',
'threading': {'max_workers': None}}
67 changes: 67 additions & 0 deletions src/zarr/codecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,34 @@
from zarr.codecs.bytes import BytesCodec, Endian
from zarr.codecs.crc32c_ import Crc32cCodec
from zarr.codecs.gzip import GzipCodec
from zarr.codecs.numcodecs import (
BZ2,
CRC32,
CRC32C,
LZ4,
LZMA,
ZFPY,
Adler32,
AsType,
BitRound,
Blosc,
Delta,
FixedScaleOffset,
Fletcher32,
GZip,
JenkinsLookup3,
PackBits,
PCodec,
Quantize,
Shuffle,
Zlib,
Zstd,
)
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
from zarr.codecs.transpose import TransposeCodec
from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
from zarr.codecs.zstd import ZstdCodec
from zarr.registry import register_codec

__all__ = [
"BloscCname",
Expand All @@ -24,3 +48,46 @@
"VLenUTF8Codec",
"ZstdCodec",
]

register_codec("blosc", BloscCodec)
register_codec("bytes", BytesCodec)

# compatibility with earlier versions of ZEP1
register_codec("endian", BytesCodec)
register_codec("crc32c", Crc32cCodec)
register_codec("gzip", GzipCodec)
register_codec("sharding_indexed", ShardingCodec)
register_codec("zstd", ZstdCodec)
register_codec("vlen-utf8", VLenUTF8Codec)
register_codec("vlen-bytes", VLenBytesCodec)
register_codec("transpose", TransposeCodec)

# Register all the codecs formerly contained in numcodecs.zarr3

register_codec("numcodecs.bz2", BZ2, qualname="zarr.codecs.numcodecs.BZ2")
register_codec("numcodecs.crc32", CRC32, qualname="zarr.codecs.numcodecs.CRC32")
register_codec("numcodecs.crc32c", CRC32C, qualname="zarr.codecs.numcodecs.CRC32C")
register_codec("numcodecs.lz4", LZ4, qualname="zarr.codecs.numcodecs.LZ4")
register_codec("numcodecs.lzma", LZMA, qualname="zarr.codecs.numcodecs.LZMA")
register_codec("numcodecs.zfpy", ZFPY, qualname="zarr.codecs.numcodecs.ZFPY")
register_codec("numcodecs.adler32", Adler32, qualname="zarr.codecs.numcodecs.Adler32")
register_codec("numcodecs.astype", AsType, qualname="zarr.codecs.numcodecs.AsType")
register_codec("numcodecs.bitround", BitRound, qualname="zarr.codecs.numcodecs.BitRound")
register_codec("numcodecs.blosc", Blosc, qualname="zarr.codecs.numcodecs.Blosc")
register_codec("numcodecs.delta", Delta, qualname="zarr.codecs.numcodecs.Delta")
register_codec(
"numcodecs.fixedscaleoffset",
FixedScaleOffset,
qualname="zarr.codecs.numcodecs.FixedScaleOffset",
)
register_codec("numcodecs.fletcher32", Fletcher32, qualname="zarr.codecs.numcodecs.Fletcher32")
register_codec("numcodecs.gzip", GZip, qualname="zarr.codecs.numcodecs.GZip")
register_codec(
"numcodecs.jenkins_lookup3", JenkinsLookup3, qualname="zarr.codecs.numcodecs.JenkinsLookup3"
)
register_codec("numcodecs.pcodec", PCodec, qualname="zarr.codecs.numcodecs.pcodec")
register_codec("numcodecs.packbits", PackBits, qualname="zarr.codecs.numcodecs.PackBits")
register_codec("numcodecs.quantize", Quantize, qualname="zarr.codecs.numcodecs.Quantize")
register_codec("numcodecs.shuffle", Shuffle, qualname="zarr.codecs.numcodecs.Shuffle")
register_codec("numcodecs.zlib", Zlib, qualname="zarr.codecs.numcodecs.Zlib")
register_codec("numcodecs.zstd", Zstd, qualname="zarr.codecs.numcodecs.Zstd")
4 changes: 0 additions & 4 deletions src/zarr/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from zarr.core.buffer.cpu import as_numpy_array_wrapper
from zarr.core.common import JSON, parse_enum, parse_named_configuration
from zarr.core.dtype.common import HasItemSize
from zarr.registry import register_codec

if TYPE_CHECKING:
from typing import Self
Expand Down Expand Up @@ -199,6 +198,3 @@ async def _encode_single(

def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
raise NotImplementedError


register_codec("blosc", BloscCodec)
7 changes: 0 additions & 7 deletions src/zarr/codecs/bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
from zarr.core.common import JSON, parse_enum, parse_named_configuration
from zarr.core.dtype.common import HasEndianness
from zarr.registry import register_codec

if TYPE_CHECKING:
from typing import Self
Expand Down Expand Up @@ -119,9 +118,3 @@ async def _encode_single(

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length


register_codec("bytes", BytesCodec)

# compatibility with earlier versions of ZEP1
register_codec("endian", BytesCodec)
4 changes: 0 additions & 4 deletions src/zarr/codecs/crc32c_.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from zarr.abc.codec import BytesBytesCodec
from zarr.core.common import JSON, parse_named_configuration
from zarr.registry import register_codec

if TYPE_CHECKING:
from typing import Self
Expand Down Expand Up @@ -65,6 +64,3 @@ async def _encode_single(

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length + 4


register_codec("crc32c", Crc32cCodec)
4 changes: 0 additions & 4 deletions src/zarr/codecs/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from zarr.abc.codec import BytesBytesCodec
from zarr.core.buffer.cpu import as_numpy_array_wrapper
from zarr.core.common import JSON, parse_named_configuration
from zarr.registry import register_codec

if TYPE_CHECKING:
from typing import Self
Expand Down Expand Up @@ -73,6 +72,3 @@ def compute_encoded_size(
_chunk_spec: ArraySpec,
) -> int:
raise NotImplementedError


register_codec("gzip", GzipCodec)
57 changes: 57 additions & 0 deletions src/zarr/codecs/numcodecs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from __future__ import annotations

from zarr.codecs.numcodecs._codecs import (
BZ2,
CRC32,
CRC32C,
LZ4,
LZMA,
ZFPY,
Adler32,
AsType,
BitRound,
Blosc,
Delta,
FixedScaleOffset,
Fletcher32,
GZip,
JenkinsLookup3,
PackBits,
PCodec,
Quantize,
Shuffle,
Zlib,
Zstd,
_NumcodecsArrayArrayCodec,
_NumcodecsArrayBytesCodec,
_NumcodecsBytesBytesCodec,
_NumcodecsCodec,
)

__all__ = [
"BZ2",
"CRC32",
"CRC32C",
"LZ4",
"LZMA",
"ZFPY",
"Adler32",
"AsType",
"BitRound",
"Blosc",
"Delta",
"FixedScaleOffset",
"Fletcher32",
"GZip",
"JenkinsLookup3",
"PCodec",
"PackBits",
"Quantize",
"Shuffle",
"Zlib",
"Zstd",
"_NumcodecsArrayArrayCodec",
"_NumcodecsArrayBytesCodec",
"_NumcodecsBytesBytesCodec",
"_NumcodecsCodec",
]
Loading
Loading