Skip to content

Use blosc2 package instead of bundled blosc #538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ jobs:
conda activate env
python -m pip list

- name: Flake8
shell: "bash -l {0}"
run: |
conda activate env
flake8

- name: Run tests
shell: "bash -l {0}"
run: |
Expand Down
1 change: 0 additions & 1 deletion c-blosc
Submodule c-blosc deleted from b886c1
7 changes: 3 additions & 4 deletions numcodecs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,12 @@
from numcodecs.blosc import Blosc
register_codec(Blosc)
# initialize blosc
import blosc2
try:
ncores = multiprocessing.cpu_count()
except OSError: # pragma: no cover
ncores = 1
blosc.init()
blosc.set_nthreads(min(8, ncores))
atexit.register(blosc.destroy)
blosc2.nthreads = min(8, ncores)

with suppress(ImportError):
from numcodecs import zstd
Expand Down Expand Up @@ -117,4 +116,4 @@
register_codec(Fletcher32)

from numcodecs.pcodec import PCodec
register_codec(PCodec)
register_codec(PCodec)
87 changes: 87 additions & 0 deletions numcodecs/blosc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from .abc import Codec
from .compat import ensure_contiguous_ndarray

import blosc2

NOSHUFFLE = 0
SHUFFLE = 1
BITSHUFFLE = 2
AUTOSHUFFLE = -1
AUTOBLOCKS = 0

_shuffles = [blosc2.Filter.NOFILTER, blosc2.Filter.SHUFFLE, blosc2.Filter.BITSHUFFLE]
_shuffle_repr = ['AUTOSHUFFLE', 'NOSHUFFLE', 'SHUFFLE', 'BITSHUFFLE']

cbuffer_sizes = blosc2.get_cbuffer_sizes

def list_compressors():
return [str(codec).lower().replace("codec.", "") for codec in blosc2.compressor_list()]

def cbuffer_complib(source):
"""Return the name of the compression library used to compress `source`."""
return blosc2.get_clib(source)

def compress(source, cname: bytes, clevel, shuffle: int=SHUFFLE, blocksize=AUTOBLOCKS):
cname = cname.decode('ascii')
blosc2.set_blocksize(blocksize)
return blosc2.compress(source, codec=getattr(blosc2.Codec, cname.upper()), clevel=clevel, filter=_shuffles[shuffle])


class Blosc(Codec):
"""Codec providing compression using the Blosc meta-compressor.

Parameters
----------
cname : string, optional
A string naming one of the compression algorithms available within blosc, e.g.,
'zstd', 'blosclz', 'lz4', 'lz4hc', 'zlib' or 'snappy'.
clevel : integer, optional
An integer between 0 and 9 specifying the compression level.
shuffle : integer, optional
Either NOSHUFFLE (0), SHUFFLE (1), BITSHUFFLE (2) or AUTOSHUFFLE (-1). If AUTOSHUFFLE,
bit-shuffle will be used for buffers with itemsize 1, and byte-shuffle will
be used otherwise. The default is `SHUFFLE`.
blocksize : int
The requested size of the compressed blocks. If 0 (default), an automatic
blocksize will be used.

See Also
--------
numcodecs.zstd.Zstd, numcodecs.lz4.LZ4

"""

codec_id = 'blosc'
NOSHUFFLE = NOSHUFFLE
SHUFFLE = SHUFFLE
BITSHUFFLE = BITSHUFFLE
AUTOSHUFFLE = AUTOSHUFFLE
max_buffer_size = 2**31 - 1

def __init__(self, cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=AUTOBLOCKS):
self.cname = cname
self.clevel = clevel
self.shuffle = shuffle
self.blocksize = blocksize

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
return compress(buf, bytes(self.cname, 'ascii'), self.clevel, self.shuffle, self.blocksize)

def decode(self, buf, out=None):
buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
return blosc2.decompress(buf, dst=out)

# def decode_partial(self, buf, int start, int nitems, out=None):
# '''**Experimental**'''
# buf = ensure_contiguous_ndarray(buf, self.max_buffer_size)
# return decompress_partial(buf, start, nitems, dest=out)

def __repr__(self):
r = '%s(cname=%r, clevel=%r, shuffle=%s, blocksize=%s)' % \
(type(self).__name__,
self.cname,
self.clevel,
_shuffle_repr[self.shuffle + 1],
self.blocksize)
return r
Loading
Loading