1
1
# -*- coding: utf-8 -*-
2
2
from __future__ import absolute_import , print_function , division
3
3
import unittest
4
- import itertools
4
+ import warnings
5
5
6
6
7
7
import numpy as np
8
8
9
9
10
10
try :
11
11
from numcodecs .msgpacks import LegacyMsgPack , MsgPack
12
- codecs = [LegacyMsgPack (), MsgPack ()]
12
+ default_codec = MsgPack ()
13
+ # N.B., legacy codec is broken, see tests below. Also legacy code generates
14
+ # PendingDeprecationWarning due to use of encoding argument, which we ignore here
15
+ # as not relevant.
16
+ legacy_codec = LegacyMsgPack ()
13
17
except ImportError : # pragma: no cover
14
18
raise unittest .SkipTest ("msgpack not available" )
15
19
16
20
17
21
from numcodecs .tests .common import (check_config , check_repr , check_encode_decode_array ,
18
22
check_backwards_compatibility , greetings )
23
+ from numcodecs .compat import text_type , binary_type , PY2
19
24
20
25
21
26
# object array with strings
22
27
# object array with mix strings / nans
23
28
# object array with mix of string, int, float
24
29
# ...
25
30
arrays = [
26
- np .array (['foo' , 'bar' , 'baz' ] * 300 , dtype = object ),
27
- np .array ([['foo' , 'bar' , np .nan ]] * 300 , dtype = object ),
28
- np .array (['foo' , 1.0 , 2 ] * 300 , dtype = object ),
31
+ np .array ([u 'foo' , u 'bar' , u 'baz' ] * 300 , dtype = object ),
32
+ np .array ([[u 'foo' , u 'bar' , np .nan ]] * 300 , dtype = object ),
33
+ np .array ([u 'foo' , 1.0 , 2 ] * 300 , dtype = object ),
29
34
np .arange (1000 , dtype = 'i4' ),
30
- np .array (['foo' , 'bar' , 'baz' ] * 300 ),
31
- np .array (['foo' , ['bar' , 1.0 , 2 ], {'a' : 'b' , 'c' : 42 }] * 300 , dtype = object ),
35
+ np .array ([u 'foo' , u 'bar' , u 'baz' ] * 300 ),
36
+ np .array ([u 'foo' , [u 'bar' , 1.0 , 2 ], {u 'a' : u 'b' , u 'c' : 42 }] * 300 , dtype = object ),
32
37
np .array (greetings * 100 ),
33
38
np .array (greetings * 100 , dtype = object ),
39
+ np .array ([b'foo' , b'bar' , b'baz' ] * 300 , dtype = object ),
40
+ np .array ([g .encode ('utf-8' ) for g in greetings ] * 100 , dtype = object ),
34
41
]
35
42
36
43
44
+ legacy_arrays = arrays [:8 ]
45
+
46
+
37
47
def test_encode_decode ():
38
- for arr , codec in itertools .product (arrays , codecs ):
39
- check_encode_decode_array (arr , codec )
48
+
49
+ for arr in arrays :
50
+ check_encode_decode_array (arr , default_codec )
51
+
52
+ with warnings .catch_warnings ():
53
+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
54
+ for arr in legacy_arrays :
55
+ check_encode_decode_array (arr , legacy_codec )
40
56
41
57
42
58
def test_config ():
43
- for codec in codecs :
59
+ for codec in [ default_codec , legacy_codec ] :
44
60
check_config (codec )
45
61
46
62
47
63
def test_repr ():
48
- check_repr ("MsgPack(encoding='utf-8' )" )
49
- check_repr ("MsgPack(encoding='ascii' )" )
64
+ check_repr ("MsgPack(raw=False, use_bin_type=True, use_single_float=False )" )
65
+ check_repr ("MsgPack(raw=True, use_bin_type=False, use_single_float=True )" )
50
66
check_repr ("LegacyMsgPack(encoding='utf-8')" )
51
67
check_repr ("LegacyMsgPack(encoding='ascii')" )
52
68
53
69
54
70
def test_backwards_compatibility ():
55
- for codec in codecs :
56
- check_backwards_compatibility (codec .codec_id , arrays , [codec ])
71
+ check_backwards_compatibility (default_codec .codec_id , arrays , [default_codec ])
72
+ with warnings .catch_warnings ():
73
+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
74
+ check_backwards_compatibility (legacy_codec .codec_id , legacy_arrays ,
75
+ [legacy_codec ])
57
76
58
77
59
78
def test_non_numpy_inputs ():
@@ -64,16 +83,21 @@ def test_non_numpy_inputs():
64
83
[[0 , 1 ], [2 , 3 ]],
65
84
[[0 ], [1 ], [2 , 3 ]],
66
85
[[[0 , 0 ]], [[1 , 1 ]], [[2 , 3 ]]],
67
- ["1" ],
68
- ["11" , "11" ],
69
- ["11" , "1" , "1" ],
86
+ [u "1" ],
87
+ [u "11" , u "11" ],
88
+ [u "11" , u "1" , u "1" ],
70
89
[{}],
71
- [{"key" : "value" }, ["list" , "of" , "strings" ]],
90
+ [{u"key" : u"value" }, [u"list" , u"of" , u"strings" ]],
91
+ [b"1" ],
92
+ [b"11" , b"11" ],
93
+ [b"11" , b"1" , b"1" ],
94
+ [{b"key" : b"value" }, [b"list" , b"of" , b"strings" ]],
72
95
]
73
96
for input_data in data :
74
- for codec in codecs :
75
- output_data = codec .decode (codec .encode (input_data ))
76
- assert np .array_equal (np .array (input_data ), output_data )
97
+ actual = default_codec .decode (default_codec .encode (input_data ))
98
+ expect = np .array (input_data )
99
+ assert expect .shape == actual .shape
100
+ assert np .array_equal (expect , actual )
77
101
78
102
79
103
def test_legacy_codec_broken ():
@@ -85,7 +109,9 @@ def test_legacy_codec_broken():
85
109
a [0 ] = [0 , 1 ]
86
110
a [1 ] = [2 , 3 ]
87
111
codec = LegacyMsgPack ()
88
- b = codec .decode (codec .encode (a ))
112
+ with warnings .catch_warnings ():
113
+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
114
+ b = codec .decode (codec .encode (a ))
89
115
assert a .shape == (2 ,)
90
116
assert b .shape == (2 , 2 )
91
117
assert not np .array_equal (a , b )
@@ -94,3 +120,66 @@ def test_legacy_codec_broken():
94
120
codec = MsgPack ()
95
121
b = codec .decode (codec .encode (a ))
96
122
assert np .array_equal (a , b )
123
+ assert a .shape == b .shape
124
+
125
+
126
+ def test_encode_decode_shape_dtype_preserved ():
127
+ for arr in arrays :
128
+ actual = default_codec .decode (default_codec .encode (arr ))
129
+ assert arr .shape == actual .shape
130
+ assert arr .dtype == actual .dtype
131
+
132
+
133
+ def test_bytes ():
134
+ # test msgpack behaviour with bytes and str (unicode)
135
+ bytes_arr = np .array ([b'foo' , b'bar' , b'baz' ], dtype = object )
136
+ unicode_arr = np .array ([u'foo' , u'bar' , u'baz' ], dtype = object )
137
+
138
+ # raw=False (default)
139
+ codec = MsgPack ()
140
+ # works for bytes array, round-trips bytes to bytes
141
+ b = codec .decode (codec .encode (bytes_arr ))
142
+ assert np .array_equal (bytes_arr , b )
143
+ assert isinstance (b [0 ], binary_type )
144
+ assert b [0 ] == b'foo'
145
+ # works for unicode array, round-trips unicode to unicode
146
+ b = codec .decode (codec .encode (unicode_arr ))
147
+ assert np .array_equal (unicode_arr , b )
148
+ assert isinstance (b [0 ], text_type )
149
+ assert b [0 ] == u'foo'
150
+
151
+ # raw=True
152
+ codec = MsgPack (raw = True )
153
+ # works for bytes array, round-trips bytes to bytes
154
+ b = codec .decode (codec .encode (bytes_arr ))
155
+ assert np .array_equal (bytes_arr , b )
156
+ assert isinstance (b [0 ], binary_type )
157
+ assert b [0 ] == b'foo'
158
+ # broken for unicode array, round-trips unicode to bytes
159
+ b = codec .decode (codec .encode (unicode_arr ))
160
+ if PY2 :
161
+ # PY2 considers b'foo' and u'foo' to be equal
162
+ assert np .array_equal (unicode_arr , b )
163
+ else :
164
+ assert not np .array_equal (unicode_arr , b )
165
+ assert isinstance (b [0 ], binary_type )
166
+ assert b [0 ] == b'foo'
167
+
168
+ # legacy codec
169
+ codec = LegacyMsgPack ()
170
+ with warnings .catch_warnings ():
171
+ warnings .simplefilter ('ignore' , PendingDeprecationWarning )
172
+ # broken for bytes array, round-trips bytes to unicode
173
+ b = codec .decode (codec .encode (bytes_arr ))
174
+ if PY2 :
175
+ # PY2 considers b'foo' and u'foo' to be equal
176
+ assert np .array_equal (unicode_arr , b )
177
+ else :
178
+ assert not np .array_equal (bytes_arr , b )
179
+ assert isinstance (b [0 ], text_type )
180
+ assert b [0 ] == u'foo'
181
+ # works for unicode array, round-trips unicode to unicode
182
+ b = codec .decode (codec .encode (unicode_arr ))
183
+ assert np .array_equal (unicode_arr , b )
184
+ assert isinstance (b [0 ], text_type )
185
+ assert b [0 ] == u'foo'
0 commit comments