Skip to content

Commit bea4b32

Browse files
authored
Merge pull request #98 from alimanfoo/msgpack-alimanfoo-20181106
Fix msgpack issues and warnings
2 parents 264aa2d + 802424a commit bea4b32

File tree

9 files changed

+150
-31
lines changed

9 files changed

+150
-31
lines changed

fixture/msgpack2/array.08.npy

2.13 KB
Binary file not shown.

fixture/msgpack2/array.09.npy

3.16 KB
Binary file not shown.

fixture/msgpack2/codec.00/config.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
{
2-
"encoding": "utf-8",
3-
"id": "msgpack2"
2+
"id": "msgpack2",
3+
"raw": false,
4+
"use_bin_type": true,
5+
"use_single_float": false
46
}
-100 Bytes
Binary file not shown.
-100 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
���foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�foo�bar�baz�|O���

fixture/msgpack2/codec.00/encoded.09.dat

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

numcodecs/msgpacks.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@ class MsgPack(Codec):
1818
The encoding format has been changed to include the array shape in the encoded
1919
data, which ensures that all object arrays can be correctly encoded and decoded.
2020
21+
Parameters
22+
----------
23+
use_single_float : bool, optional
24+
Use single precision float type for float.
25+
use_bin_type : bool, optional
26+
Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type
27+
for unicode.
28+
raw : bool, optional
29+
If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str
30+
(or unicode on Python 2) by decoding with UTF-8 encoding.
31+
2132
Examples
2233
--------
2334
>>> import numcodecs
@@ -39,19 +50,22 @@ class MsgPack(Codec):
3950

4051
codec_id = 'msgpack2'
4152

42-
def __init__(self, encoding='utf-8'):
43-
self.encoding = encoding
53+
def __init__(self, use_single_float=False, use_bin_type=True, raw=False):
54+
self.use_single_float = use_single_float
55+
self.use_bin_type = use_bin_type
56+
self.raw = raw
4457

4558
def encode(self, buf):
4659
buf = np.asanyarray(buf)
4760
items = buf.tolist()
4861
items.append(buf.dtype.str)
4962
items.append(buf.shape)
50-
return msgpack.packb(items, encoding=self.encoding)
63+
return msgpack.packb(items, use_bin_type=self.use_bin_type,
64+
use_single_float=self.use_single_float)
5165

5266
def decode(self, buf, out=None):
5367
buf = buffer_tobytes(buf)
54-
items = msgpack.unpackb(buf, encoding=self.encoding)
68+
items = msgpack.unpackb(buf, raw=self.raw)
5569
dec = np.empty(items[-1], dtype=items[-2])
5670
dec[:] = items[:-2]
5771
if out is not None:
@@ -62,13 +76,18 @@ def decode(self, buf, out=None):
6276

6377
def get_config(self):
6478
return dict(id=self.codec_id,
65-
encoding=self.encoding)
79+
raw=self.raw,
80+
use_single_float=self.use_single_float,
81+
use_bin_type=self.use_bin_type)
6682

6783
def __repr__(self):
68-
return 'MsgPack(encoding=%r)' % self.encoding
84+
return (
85+
'MsgPack(raw={!r}, use_bin_type={!r}, use_single_float={!r})'
86+
.format(self.raw, self.use_bin_type, self.use_single_float)
87+
)
6988

7089

71-
class LegacyMsgPack(MsgPack):
90+
class LegacyMsgPack(Codec):
7291
"""Deprecated MsgPack codec.
7392
7493
.. deprecated:: 0.6.0
@@ -82,6 +101,9 @@ class LegacyMsgPack(MsgPack):
82101

83102
codec_id = 'msgpack'
84103

104+
def __init__(self, encoding='utf-8'):
105+
self.encoding = encoding
106+
85107
def encode(self, buf):
86108
buf = np.asanyarray(buf)
87109
items = buf.tolist()
@@ -98,5 +120,9 @@ def decode(self, buf, out=None):
98120
else:
99121
return dec
100122

123+
def get_config(self):
124+
return dict(id=self.codec_id,
125+
encoding=self.encoding)
126+
101127
def __repr__(self):
102128
return 'LegacyMsgPack(encoding=%r)' % self.encoding

numcodecs/tests/test_msgpacks.py

Lines changed: 111 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,78 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
33
import unittest
4-
import itertools
4+
import warnings
55

66

77
import numpy as np
88

99

1010
try:
1111
from numcodecs.msgpacks import LegacyMsgPack, MsgPack
12-
codecs = [LegacyMsgPack(), MsgPack()]
12+
default_codec = MsgPack()
13+
# N.B., legacy codec is broken, see tests below. Also legacy code generates
14+
# PendingDeprecationWarning due to use of encoding argument, which we ignore here
15+
# as not relevant.
16+
legacy_codec = LegacyMsgPack()
1317
except ImportError: # pragma: no cover
1418
raise unittest.SkipTest("msgpack not available")
1519

1620

1721
from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array,
1822
check_backwards_compatibility, greetings)
23+
from numcodecs.compat import text_type, binary_type, PY2
1924

2025

2126
# object array with strings
2227
# object array with mix strings / nans
2328
# object array with mix of string, int, float
2429
# ...
2530
arrays = [
26-
np.array(['foo', 'bar', 'baz'] * 300, dtype=object),
27-
np.array([['foo', 'bar', np.nan]] * 300, dtype=object),
28-
np.array(['foo', 1.0, 2] * 300, dtype=object),
31+
np.array([u'foo', u'bar', u'baz'] * 300, dtype=object),
32+
np.array([[u'foo', u'bar', np.nan]] * 300, dtype=object),
33+
np.array([u'foo', 1.0, 2] * 300, dtype=object),
2934
np.arange(1000, dtype='i4'),
30-
np.array(['foo', 'bar', 'baz'] * 300),
31-
np.array(['foo', ['bar', 1.0, 2], {'a': 'b', 'c': 42}] * 300, dtype=object),
35+
np.array([u'foo', u'bar', u'baz'] * 300),
36+
np.array([u'foo', [u'bar', 1.0, 2], {u'a': u'b', u'c': 42}] * 300, dtype=object),
3237
np.array(greetings * 100),
3338
np.array(greetings * 100, dtype=object),
39+
np.array([b'foo', b'bar', b'baz'] * 300, dtype=object),
40+
np.array([g.encode('utf-8') for g in greetings] * 100, dtype=object),
3441
]
3542

3643

44+
legacy_arrays = arrays[:8]
45+
46+
3747
def test_encode_decode():
38-
for arr, codec in itertools.product(arrays, codecs):
39-
check_encode_decode_array(arr, codec)
48+
49+
for arr in arrays:
50+
check_encode_decode_array(arr, default_codec)
51+
52+
with warnings.catch_warnings():
53+
warnings.simplefilter('ignore', PendingDeprecationWarning)
54+
for arr in legacy_arrays:
55+
check_encode_decode_array(arr, legacy_codec)
4056

4157

4258
def test_config():
43-
for codec in codecs:
59+
for codec in [default_codec, legacy_codec]:
4460
check_config(codec)
4561

4662

4763
def test_repr():
48-
check_repr("MsgPack(encoding='utf-8')")
49-
check_repr("MsgPack(encoding='ascii')")
64+
check_repr("MsgPack(raw=False, use_bin_type=True, use_single_float=False)")
65+
check_repr("MsgPack(raw=True, use_bin_type=False, use_single_float=True)")
5066
check_repr("LegacyMsgPack(encoding='utf-8')")
5167
check_repr("LegacyMsgPack(encoding='ascii')")
5268

5369

5470
def test_backwards_compatibility():
55-
for codec in codecs:
56-
check_backwards_compatibility(codec.codec_id, arrays, [codec])
71+
check_backwards_compatibility(default_codec.codec_id, arrays, [default_codec])
72+
with warnings.catch_warnings():
73+
warnings.simplefilter('ignore', PendingDeprecationWarning)
74+
check_backwards_compatibility(legacy_codec.codec_id, legacy_arrays,
75+
[legacy_codec])
5776

5877

5978
def test_non_numpy_inputs():
@@ -64,16 +83,21 @@ def test_non_numpy_inputs():
6483
[[0, 1], [2, 3]],
6584
[[0], [1], [2, 3]],
6685
[[[0, 0]], [[1, 1]], [[2, 3]]],
67-
["1"],
68-
["11", "11"],
69-
["11", "1", "1"],
86+
[u"1"],
87+
[u"11", u"11"],
88+
[u"11", u"1", u"1"],
7089
[{}],
71-
[{"key": "value"}, ["list", "of", "strings"]],
90+
[{u"key": u"value"}, [u"list", u"of", u"strings"]],
91+
[b"1"],
92+
[b"11", b"11"],
93+
[b"11", b"1", b"1"],
94+
[{b"key": b"value"}, [b"list", b"of", b"strings"]],
7295
]
7396
for input_data in data:
74-
for codec in codecs:
75-
output_data = codec.decode(codec.encode(input_data))
76-
assert np.array_equal(np.array(input_data), output_data)
97+
actual = default_codec.decode(default_codec.encode(input_data))
98+
expect = np.array(input_data)
99+
assert expect.shape == actual.shape
100+
assert np.array_equal(expect, actual)
77101

78102

79103
def test_legacy_codec_broken():
@@ -85,7 +109,9 @@ def test_legacy_codec_broken():
85109
a[0] = [0, 1]
86110
a[1] = [2, 3]
87111
codec = LegacyMsgPack()
88-
b = codec.decode(codec.encode(a))
112+
with warnings.catch_warnings():
113+
warnings.simplefilter('ignore', PendingDeprecationWarning)
114+
b = codec.decode(codec.encode(a))
89115
assert a.shape == (2,)
90116
assert b.shape == (2, 2)
91117
assert not np.array_equal(a, b)
@@ -94,3 +120,66 @@ def test_legacy_codec_broken():
94120
codec = MsgPack()
95121
b = codec.decode(codec.encode(a))
96122
assert np.array_equal(a, b)
123+
assert a.shape == b.shape
124+
125+
126+
def test_encode_decode_shape_dtype_preserved():
127+
for arr in arrays:
128+
actual = default_codec.decode(default_codec.encode(arr))
129+
assert arr.shape == actual.shape
130+
assert arr.dtype == actual.dtype
131+
132+
133+
def test_bytes():
134+
# test msgpack behaviour with bytes and str (unicode)
135+
bytes_arr = np.array([b'foo', b'bar', b'baz'], dtype=object)
136+
unicode_arr = np.array([u'foo', u'bar', u'baz'], dtype=object)
137+
138+
# raw=False (default)
139+
codec = MsgPack()
140+
# works for bytes array, round-trips bytes to bytes
141+
b = codec.decode(codec.encode(bytes_arr))
142+
assert np.array_equal(bytes_arr, b)
143+
assert isinstance(b[0], binary_type)
144+
assert b[0] == b'foo'
145+
# works for unicode array, round-trips unicode to unicode
146+
b = codec.decode(codec.encode(unicode_arr))
147+
assert np.array_equal(unicode_arr, b)
148+
assert isinstance(b[0], text_type)
149+
assert b[0] == u'foo'
150+
151+
# raw=True
152+
codec = MsgPack(raw=True)
153+
# works for bytes array, round-trips bytes to bytes
154+
b = codec.decode(codec.encode(bytes_arr))
155+
assert np.array_equal(bytes_arr, b)
156+
assert isinstance(b[0], binary_type)
157+
assert b[0] == b'foo'
158+
# broken for unicode array, round-trips unicode to bytes
159+
b = codec.decode(codec.encode(unicode_arr))
160+
if PY2:
161+
# PY2 considers b'foo' and u'foo' to be equal
162+
assert np.array_equal(unicode_arr, b)
163+
else:
164+
assert not np.array_equal(unicode_arr, b)
165+
assert isinstance(b[0], binary_type)
166+
assert b[0] == b'foo'
167+
168+
# legacy codec
169+
codec = LegacyMsgPack()
170+
with warnings.catch_warnings():
171+
warnings.simplefilter('ignore', PendingDeprecationWarning)
172+
# broken for bytes array, round-trips bytes to unicode
173+
b = codec.decode(codec.encode(bytes_arr))
174+
if PY2:
175+
# PY2 considers b'foo' and u'foo' to be equal
176+
assert np.array_equal(unicode_arr, b)
177+
else:
178+
assert not np.array_equal(bytes_arr, b)
179+
assert isinstance(b[0], text_type)
180+
assert b[0] == u'foo'
181+
# works for unicode array, round-trips unicode to unicode
182+
b = codec.decode(codec.encode(unicode_arr))
183+
assert np.array_equal(unicode_arr, b)
184+
assert isinstance(b[0], text_type)
185+
assert b[0] == u'foo'

0 commit comments

Comments
 (0)