Skip to content

Commit 5b12b15

Browse files
authored
Adds checksum flag to zstd codec (#519)
* expose checksum toggle for zstd * fixes zstd checksumming * less fixtures * write_checksum -> checksum * adds release notes * set default clevel to 0 * release * update fixtures * fix checksum flag * add test for checksum * adds wrapper codecs for the v2 codec pipeline * docstring
1 parent bef2e16 commit 5b12b15

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+108
-23
lines changed

docs/release.rst

Lines changed: 3 additions & 2 deletions

fixture/zstd/codec.00/config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"id": "zstd",
3-
"level": 1
3+
"level": 0
44
}

fixture/zstd/codec.07/config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"checksum": true,
3+
"id": "zstd",
4+
"level": 0
5+
}

fixture/zstd/codec.07/encoded.00.dat

1.87 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.01.dat

4.9 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.02.dat

6.83 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.03.dat

264 Bytes
Binary file not shown.

fixture/zstd/codec.07/encoded.04.dat

475 Bytes
Binary file not shown.

fixture/zstd/codec.07/encoded.05.dat

7.83 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.06.dat

7.83 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.07.dat

3.83 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.08.dat

3.83 KB
Binary file not shown.

fixture/zstd/codec.07/encoded.09.dat

651 Bytes
Binary file not shown.

fixture/zstd/codec.07/encoded.10.dat

652 Bytes
Binary file not shown.

fixture/zstd/codec.07/encoded.11.dat

659 Bytes
Binary file not shown.

fixture/zstd/codec.07/encoded.12.dat

657 Bytes
Binary file not shown.

fixture/zstd/codec.08/config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"checksum": true,
3+
"id": "zstd",
4+
"level": 0
5+
}

fixture/zstd/codec.08/encoded.00.dat

1.87 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.01.dat

4.9 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.02.dat

6.83 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.03.dat

264 Bytes
Binary file not shown.

fixture/zstd/codec.08/encoded.04.dat

475 Bytes
Binary file not shown.

fixture/zstd/codec.08/encoded.05.dat

7.83 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.06.dat

7.83 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.07.dat

3.83 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.08.dat

3.83 KB
Binary file not shown.

fixture/zstd/codec.08/encoded.09.dat

651 Bytes
Binary file not shown.

fixture/zstd/codec.08/encoded.10.dat

652 Bytes
Binary file not shown.

fixture/zstd/codec.08/encoded.11.dat

659 Bytes
Binary file not shown.

fixture/zstd/codec.08/encoded.12.dat

657 Bytes
Binary file not shown.

fixture/zstd/codec.09/config.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"checksum": true,
3+
"id": "zstd",
4+
"level": 22
5+
}

fixture/zstd/codec.09/encoded.00.dat

1.02 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.01.dat

3.94 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.02.dat

6.26 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.03.dat

161 Bytes
Binary file not shown.

fixture/zstd/codec.09/encoded.04.dat

365 Bytes
Binary file not shown.

fixture/zstd/codec.09/encoded.05.dat

7.77 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.06.dat

7.78 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.07.dat

3.29 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.08.dat

3.31 KB
Binary file not shown.

fixture/zstd/codec.09/encoded.09.dat

850 Bytes
Binary file not shown.

fixture/zstd/codec.09/encoded.10.dat

854 Bytes
Binary file not shown.

fixture/zstd/codec.09/encoded.11.dat

849 Bytes
Binary file not shown.

fixture/zstd/codec.09/encoded.12.dat

849 Bytes
Binary file not shown.

numcodecs/tests/test_zstd.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,18 @@
2929
Zstd(level=10),
3030
Zstd(level=22),
3131
Zstd(level=100),
32+
Zstd(checksum=True),
33+
Zstd(level=0, checksum=True),
34+
Zstd(level=22, checksum=True),
3235
]
3336

3437

3538
# mix of dtypes: integer, float, bool, string
3639
# mix of shapes: 1D, 2D, 3D
3740
# mix of orders: C, F
3841
arrays = [
39-
np.arange(1000, dtype='i4'),
40-
np.linspace(1000, 1001, 1000, dtype='f8'),
42+
np.arange(1000, dtype="i4"),
43+
np.linspace(1000, 1001, 1000, dtype="f8"),
4144
np.random.normal(loc=1000, scale=1, size=(100, 10)),
4245
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
4346
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
@@ -76,3 +79,17 @@ def test_err_decode_object_buffer():
7679

7780
def test_err_encode_object_buffer():
7881
check_err_encode_object_buffer(Zstd())
82+
83+
84+
def test_checksum():
85+
data = np.arange(0, 64, dtype="uint8")
86+
assert len(Zstd(level=0, checksum=False).encode(data)) + 4 == len(
87+
Zstd(level=0, checksum=True).encode(data)
88+
)
89+
90+
91+
def test_native_functions():
92+
# Note, these assertions might need to be changed for new versions of zstd
93+
assert Zstd.default_level == 3
94+
assert Zstd.min_level == -131072
95+
assert Zstd.max_level == 22

numcodecs/zstd.pyx

Lines changed: 70 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,38 @@ cdef extern from "zstd.h":
1919

2020
unsigned ZSTD_versionNumber() nogil
2121

22-
size_t ZSTD_compress(void* dst,
23-
size_t dstCapacity,
24-
const void* src,
25-
size_t srcSize,
26-
int compressionLevel) nogil
22+
struct ZSTD_CCtx_s:
23+
pass
24+
ctypedef ZSTD_CCtx_s ZSTD_CCtx
25+
cdef enum ZSTD_cParameter:
26+
ZSTD_c_compressionLevel=100
27+
ZSTD_c_checksumFlag=201
28+
29+
ZSTD_CCtx* ZSTD_createCCtx() nogil
30+
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) nogil
31+
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx,
32+
ZSTD_cParameter param,
33+
int value) nogil
34+
35+
size_t ZSTD_compress2(ZSTD_CCtx* cctx,
36+
void* dst,
37+
size_t dstCapacity,
38+
const void* src,
39+
size_t srcSize) nogil
2740

2841
size_t ZSTD_decompress(void* dst,
2942
size_t dstCapacity,
3043
const void* src,
3144
size_t compressedSize) nogil
3245

33-
unsigned long long ZSTD_getDecompressedSize(const void* src,
46+
cdef long ZSTD_CONTENTSIZE_UNKNOWN
47+
cdef long ZSTD_CONTENTSIZE_ERROR
48+
unsigned long long ZSTD_getFrameContentSize(const void* src,
3449
size_t srcSize) nogil
3550

51+
int ZSTD_minCLevel() nogil
3652
int ZSTD_maxCLevel() nogil
53+
int ZSTD_defaultCLevel() nogil
3754

3855
size_t ZSTD_compressBound(size_t srcSize) nogil
3956

@@ -51,11 +68,11 @@ MICRO_VERSION_NUMBER = (
5168
(MINOR_VERSION_NUMBER * 100)
5269
)
5370
__version__ = '%s.%s.%s' % (MAJOR_VERSION_NUMBER, MINOR_VERSION_NUMBER, MICRO_VERSION_NUMBER)
54-
DEFAULT_CLEVEL = 1
71+
DEFAULT_CLEVEL = 0
5572
MAX_CLEVEL = ZSTD_maxCLevel()
5673

5774

58-
def compress(source, int level=DEFAULT_CLEVEL):
75+
def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False):
5976
"""Compress data.
6077
6178
Parameters
@@ -64,7 +81,9 @@ def compress(source, int level=DEFAULT_CLEVEL):
6481
Data to be compressed. Can be any object supporting the buffer
6582
protocol.
6683
level : int
67-
Compression level (1-22).
84+
Compression level (-131072 to 22).
85+
checksum : bool
86+
Flag to enable checksums. The default is False.
6887
6988
Returns
7089
-------
@@ -80,8 +99,6 @@ def compress(source, int level=DEFAULT_CLEVEL):
8099
bytes dest
81100

82101
# check level
83-
if level <= 0:
84-
level = DEFAULT_CLEVEL
85102
if level > MAX_CLEVEL:
86103
level = MAX_CLEVEL
87104

@@ -90,6 +107,19 @@ def compress(source, int level=DEFAULT_CLEVEL):
90107
source_ptr = source_buffer.ptr
91108
source_size = source_buffer.nbytes
92109

110+
cctx = ZSTD_createCCtx()
111+
param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level)
112+
113+
if ZSTD_isError(param_set_result):
114+
error = ZSTD_getErrorName(param_set_result)
115+
raise RuntimeError('Could not set zstd compression level: %s' % error)
116+
117+
param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1 if checksum else 0)
118+
119+
if ZSTD_isError(param_set_result):
120+
error = ZSTD_getErrorName(param_set_result)
121+
raise RuntimeError('Could not set zstd checksum flag: %s' % error)
122+
93123
try:
94124

95125
# setup destination
@@ -99,10 +129,11 @@ def compress(source, int level=DEFAULT_CLEVEL):
99129

100130
# perform compression
101131
with nogil:
102-
compressed_size = ZSTD_compress(dest_ptr, dest_size, source_ptr, source_size, level)
132+
compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size)
103133

104134
finally:
105-
135+
if cctx:
136+
ZSTD_freeCCtx(cctx)
106137
# release buffers
107138
source_buffer.release()
108139

@@ -148,8 +179,8 @@ def decompress(source, dest=None):
148179
try:
149180

150181
# determine uncompressed size
151-
dest_size = ZSTD_getDecompressedSize(source_ptr, source_size)
152-
if dest_size == 0:
182+
dest_size = ZSTD_getFrameContentSize(source_ptr, source_size)
183+
if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR:
153184
raise RuntimeError('Zstd decompression error: invalid input data')
154185

155186
# setup destination buffer
@@ -193,7 +224,9 @@ class Zstd(Codec):
193224
Parameters
194225
----------
195226
level : int
196-
Compression level (1-22).
227+
Compression level (-131072 to 22).
228+
checksum : bool
229+
Flag to enable checksums. The default is False.
197230
198231
See Also
199232
--------
@@ -207,12 +240,13 @@ class Zstd(Codec):
207240
# practical limit on the size of buffers that Zstd can process and so we don't
208241
# enforce a max_buffer_size option here.
209242

210-
def __init__(self, level=DEFAULT_CLEVEL):
243+
def __init__(self, level=DEFAULT_CLEVEL, checksum=False):
211244
self.level = level
245+
self.checksum = checksum
212246

213247
def encode(self, buf):
214248
buf = ensure_contiguous_ndarray(buf)
215-
return compress(buf, self.level)
249+
return compress(buf, self.level, self.checksum)
216250

217251
def decode(self, buf, out=None):
218252
buf = ensure_contiguous_ndarray(buf)
@@ -223,3 +257,21 @@ class Zstd(Codec):
223257
(type(self).__name__,
224258
self.level)
225259
return r
260+
261+
@classmethod
262+
@property
263+
def default_level(cls):
264+
"""Returns the default compression level of the underlying zstd library."""
265+
return ZSTD_defaultCLevel()
266+
267+
@classmethod
268+
@property
269+
def min_level(cls):
270+
"""Returns the minimum compression level of the underlying zstd library."""
271+
return ZSTD_minCLevel()
272+
273+
@classmethod
274+
@property
275+
def max_level(cls):
276+
"""Returns the maximum compression level of the underlying zstd library."""
277+
return ZSTD_maxCLevel()

0 commit comments

Comments
 (0)