Skip to content

Commit 7a3a3b9

Browse files
authored
Merge pull request #12 from jakirkham/add_astype
Add AsType codec from Zarr
2 parents 8b6a33a + 2a5c613 commit 7a3a3b9

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed

numcodecs/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
except ImportError: # pragma: no cover
5050
pass
5151

52+
from numcodecs.astype import AsType
53+
register_codec(AsType)
54+
5255
from numcodecs.delta import Delta
5356
register_codec(Delta)
5457

numcodecs/astype.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
4+
import numpy as np
5+
6+
from numcodecs.abc import Codec
7+
from numcodecs.compat import buffer_copy, ndarray_from_buffer
8+
9+
10+
11+
class AsType(Codec):
12+
"""Filter to convert data between different types.
13+
14+
Parameters
15+
----------
16+
encode_dtype : dtype
17+
Data type to use for encoded data.
18+
decode_dtype : dtype, optional
19+
Data type to use for decoded data.
20+
21+
Notes
22+
-----
23+
If `encode_dtype` is of lower precision than `decode_dtype`, please be
24+
aware that data loss can occur by writing data to disk using this filter.
25+
No checks are made to ensure the casting will work in that direction and
26+
data corruption will occur.
27+
28+
Examples
29+
--------
30+
>>> import numcodecs
31+
>>> import numpy as np
32+
>>> x = np.arange(100, 120, 2, dtype=np.int8)
33+
>>> x
34+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)
35+
>>> f = numcodecs.AsType(encode_dtype=x.dtype, decode_dtype=np.int64)
36+
>>> y = f.decode(x)
37+
>>> y
38+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118])
39+
>>> z = f.encode(y)
40+
>>> z
41+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)
42+
43+
""" # flake8: noqa
44+
45+
codec_id = 'astype'
46+
47+
def __init__(self, encode_dtype, decode_dtype):
48+
self.encode_dtype = np.dtype(encode_dtype)
49+
self.decode_dtype = np.dtype(decode_dtype)
50+
51+
def encode(self, buf):
52+
53+
# view input data as 1D array
54+
arr = ndarray_from_buffer(buf, self.decode_dtype)
55+
56+
# convert and copy
57+
enc = arr.astype(self.encode_dtype)
58+
59+
return enc
60+
61+
def decode(self, buf, out=None):
62+
63+
# view encoded data as 1D array
64+
enc = ndarray_from_buffer(buf, self.encode_dtype)
65+
66+
# convert and copy
67+
dec = enc.astype(self.decode_dtype)
68+
69+
# handle output
70+
out = buffer_copy(dec, out)
71+
72+
return out
73+
74+
def get_config(self):
75+
config = dict()
76+
config['id'] = self.codec_id
77+
config['encode_dtype'] = self.encode_dtype.str
78+
config['decode_dtype'] = self.decode_dtype.str
79+
return config
80+
81+
def __repr__(self):
82+
return (
83+
'%s(encode_dtype=%r, decode_dtype=%r)' % (
84+
type(self).__name__,
85+
self.encode_dtype.str,
86+
self.decode_dtype.str
87+
)
88+
)

numcodecs/tests/test_astype.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
4+
5+
import numpy as np
6+
from numpy.testing import assert_array_equal
7+
from nose.tools import eq_ as eq
8+
9+
10+
from numcodecs.astype import AsType
11+
from numcodecs.tests.common import check_encode_decode, check_config, \
12+
check_repr
13+
14+
15+
# mix of dtypes: integer, float
16+
# mix of shapes: 1D, 2D, 3D
17+
# mix of orders: C, F
18+
arrays = [
19+
np.arange(1000, dtype='i4'),
20+
np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10),
21+
np.random.normal(loc=1000, scale=1, size=(10, 10, 10)),
22+
np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10,
23+
order='F'),
24+
]
25+
26+
27+
def test_encode_decode():
28+
for arr in arrays:
29+
codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype)
30+
check_encode_decode(arr, codec)
31+
32+
33+
def test_decode():
34+
encode_dtype, decode_dtype = '<i4', '<i8'
35+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
36+
arr = np.arange(10, 20, 1, dtype=encode_dtype)
37+
expect = arr.astype(decode_dtype)
38+
actual = codec.decode(arr)
39+
assert_array_equal(expect, actual)
40+
eq(np.dtype(decode_dtype), actual.dtype)
41+
42+
43+
def test_encode():
44+
encode_dtype, decode_dtype = '<i4', '<i8'
45+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
46+
arr = np.arange(10, 20, 1, dtype=decode_dtype)
47+
expect = arr.astype(encode_dtype)
48+
actual = codec.encode(arr)
49+
assert_array_equal(expect, actual)
50+
eq(np.dtype(encode_dtype), actual.dtype)
51+
52+
53+
def test_config():
54+
encode_dtype, decode_dtype = '<i4', '<i8'
55+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
56+
check_config(codec)
57+
58+
59+
def test_repr():
60+
check_repr("AsType(encode_dtype='<i4', decode_dtype='<i2')")

0 commit comments

Comments
 (0)