Skip to content

Commit c952d68

Browse files
committed
introducing UInt8HashTable and Int8HashTable
1 parent 0ffd3b2 commit c952d68

File tree

7 files changed

+74
-7
lines changed

7 files changed

+74
-7
lines changed

pandas/_libs/hashtable.pxd

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,23 @@ from numpy cimport intp_t, ndarray
33
from pandas._libs.khash cimport (
44
float32_t,
55
float64_t,
6+
int8_t,
67
int16_t,
78
int32_t,
89
int64_t,
910
kh_float32_t,
1011
kh_float64_t,
12+
kh_int8_t,
1113
kh_int16_t,
1214
kh_int32_t,
1315
kh_int64_t,
1416
kh_pymap_t,
1517
kh_str_t,
18+
kh_uint8_t,
1619
kh_uint16_t,
1720
kh_uint32_t,
1821
kh_uint64_t,
22+
uint8_t,
1923
uint16_t,
2024
uint32_t,
2125
uint64_t,
@@ -62,6 +66,18 @@ cdef class Int16HashTable(HashTable):
6266
cpdef get_item(self, int16_t val)
6367
cpdef set_item(self, int16_t key, Py_ssize_t val)
6468

69+
cdef class UInt8HashTable(HashTable):
70+
cdef kh_uint8_t *table
71+
72+
cpdef get_item(self, uint8_t val)
73+
cpdef set_item(self, uint8_t key, Py_ssize_t val)
74+
75+
cdef class Int8HashTable(HashTable):
76+
cdef kh_int8_t *table
77+
78+
cpdef get_item(self, int8_t val)
79+
cpdef set_item(self, int8_t key, Py_ssize_t val)
80+
6581
cdef class Float64HashTable(HashTable):
6682
cdef kh_float64_t *table
6783

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1010
# name
1111
cimported_types = ['float32',
1212
'float64',
13+
'int8',
1314
'int16',
1415
'int32',
1516
'int64',
1617
'pymap',
1718
'str',
1819
'strbox',
20+
'uint8',
1921
'uint16',
2022
'uint32',
2123
'uint64']
@@ -51,10 +53,12 @@ dtypes = [('Float64', 'float64', 'float64_t'),
5153
('Int64', 'int64', 'int64_t'),
5254
('Int32', 'int32', 'int32_t'),
5355
('Int16', 'int16', 'int16_t'),
56+
('Int8', 'int8', 'int8_t'),
5457
('String', 'string', 'char *'),
5558
('UInt64', 'uint64', 'uint64_t'),
5659
('UInt32', 'uint32', 'uint32_t'),
57-
('UInt16', 'uint16', 'uint16_t')]
60+
('UInt16', 'uint16', 'uint16_t'),
61+
('UInt8', 'uint8', 'uint8_t')]
5862
}}
5963

6064
{{for name, dtype, c_type in dtypes}}
@@ -83,9 +87,11 @@ ctypedef fused vector_data:
8387
Int64VectorData
8488
Int32VectorData
8589
Int16VectorData
90+
Int8VectorData
8691
UInt64VectorData
8792
UInt32VectorData
8893
UInt16VectorData
94+
UInt8VectorData
8995
Float64VectorData
9096
Float32VectorData
9197
StringVectorData
@@ -107,7 +113,9 @@ dtypes = [('Float64', 'float64', 'float64_t'),
107113
('UInt32', 'uint32', 'uint32_t'),
108114
('Int32', 'int32', 'int32_t'),
109115
('UInt16', 'uint16', 'uint16_t'),
110-
('Int16', 'int16', 'int16_t')]
116+
('Int16', 'int16', 'int16_t'),
117+
('UInt8', 'uint8', 'uint8_t'),
118+
('Int8', 'int8', 'int8_t')]
111119

112120
}}
113121

@@ -303,7 +311,9 @@ dtypes = [('Float64', 'float64', True, 'np.nan'),
303311
('UInt32', 'uint32', False, 0),
304312
('Int32', 'int32', False, 0),
305313
('UInt16', 'uint16', False, 0),
306-
('Int16', 'int16', False, 0)]
314+
('Int16', 'int16', False, 0),
315+
('UInt8', 'uint8', False, 0),
316+
('Int8', 'int8', False, 0)]
307317

308318
}}
309319

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ dtypes = [('float64', 'float64', 'float64_t'),
1212
('uint64', 'uint64', 'uint64_t'),
1313
('uint32', 'uint32', 'uint32_t'),
1414
('uint16', 'uint16', 'uint16_t'),
15+
('uint8', 'uint8', 'uint8_t'),
1516
('object', 'pymap', 'object'),
1617
('int64', 'int64', 'int64_t'),
1718
('int32', 'int32', 'int32_t'),
18-
('int16', 'int16', 'int16_t')]
19+
('int16', 'int16', 'int16_t'),
20+
('int8', 'int8', 'int8_t')]
1921

2022
}}
2123

@@ -284,9 +286,11 @@ dtypes = [('float64', 'float64_t', 'float64', 'float64'),
284286
('int64', 'int64_t', 'int64', 'int64'),
285287
('int32', 'int32_t', 'int32', 'int32'),
286288
('int16', 'int16_t', 'int16', 'int16'),
289+
('int8', 'int8_t', 'int8', 'int8'),
287290
('uint64', 'uint64_t', 'uint64', 'uint64'),
288291
('uint32', 'uint32_t', 'uint32', 'uint32'),
289292
('uint16', 'uint16_t', 'uint16', 'uint16'),
293+
('uint8', 'uint8_t', 'uint8', 'uint8'),
290294
('object', 'object', 'pymap', 'object_')]
291295
}}
292296

pandas/_libs/khash.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ from cpython.object cimport PyObject
22
from numpy cimport (
33
float32_t,
44
float64_t,
5+
int8_t,
56
int16_t,
67
int32_t,
78
int64_t,
9+
uint8_t,
810
uint16_t,
911
uint32_t,
1012
uint64_t,

pandas/_libs/khash_for_primitive_helper.pxi.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ primitive_types = [('int64', 'int64_t'),
1515
('float32', 'float32_t'),
1616
('int16', 'int16_t'),
1717
('uint16', 'uint16_t'),
18+
('int8', 'int8_t'),
19+
('uint8', 'uint8_t'),
1820
]
1921
}}
2022

pandas/_libs/src/klib/khash.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ typedef unsigned int khint16_t;
135135
typedef unsigned short khint16_t;
136136
#endif
137137

138+
#if UCHAR_MAX == 0xffu
139+
typedef unsigned char khint8_t;
140+
#endif
141+
138142
typedef double khfloat64_t;
139143
typedef double khfloat32_t;
140144

@@ -633,6 +637,18 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
633637
#define KHASH_MAP_INIT_UINT16(name, khval_t) \
634638
KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
635639

640+
/*! @function
641+
@abstract Instantiate a hash map containing 8bit-integer keys
642+
@param name Name of the hash table [symbol]
643+
@param khval_t Type of values [type]
644+
*/
645+
#define KHASH_MAP_INIT_INT8(name, khval_t) \
646+
KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
647+
648+
#define KHASH_MAP_INIT_UINT8(name, khval_t) \
649+
KHASH_INIT(name, khint8_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
650+
651+
636652

637653
typedef const char *kh_cstr_t;
638654
/*! @function
@@ -660,6 +676,8 @@ typedef const char *kh_cstr_t;
660676
#define kh_exist_uint32(h, k) (kh_exist(h, k))
661677
#define kh_exist_int16(h, k) (kh_exist(h, k))
662678
#define kh_exist_uint16(h, k) (kh_exist(h, k))
679+
#define kh_exist_int8(h, k) (kh_exist(h, k))
680+
#define kh_exist_uint8(h, k) (kh_exist(h, k))
663681

664682
KHASH_MAP_INIT_STR(str, size_t)
665683
KHASH_MAP_INIT_INT(int32, size_t)
@@ -668,6 +686,8 @@ KHASH_MAP_INIT_INT64(int64, size_t)
668686
KHASH_MAP_INIT_UINT64(uint64, size_t)
669687
KHASH_MAP_INIT_INT16(int16, size_t)
670688
KHASH_MAP_INIT_UINT16(uint16, size_t)
689+
KHASH_MAP_INIT_INT16(int8, size_t)
690+
KHASH_MAP_INIT_UINT16(uint8, size_t)
671691

672692

673693
#endif /* __AC_KHASH_H */

pandas/tests/libs/test_hashtable.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
(ht.Float32HashTable, np.float32),
1818
(ht.Int16HashTable, np.int16),
1919
(ht.UInt16HashTable, np.uint16),
20+
(ht.Int8HashTable, np.int8),
21+
(ht.UInt8HashTable, np.uint8),
2022
],
2123
)
2224
class TestHashTable:
@@ -77,7 +79,10 @@ def test_lookup(self, table_type, dtype):
7779
tm.assert_numpy_array_equal(result, expected)
7880

7981
def test_lookup_wrong(self, table_type, dtype):
80-
N = 512
82+
if dtype in (np.int8, np.uint8):
83+
N = 100
84+
else:
85+
N = 512
8186
table = table_type()
8287
keys = (np.arange(N) + N).astype(dtype)
8388
table.map_locations(keys)
@@ -86,7 +91,10 @@ def test_lookup_wrong(self, table_type, dtype):
8691
assert np.all(result == -1)
8792

8893
def test_unique(self, table_type, dtype):
89-
N = 1000
94+
if dtype in (np.int8, np.uint8):
95+
N = 88
96+
else:
97+
N = 1000
9098
table = table_type()
9199
expected = (np.arange(N) + N).astype(dtype)
92100
keys = np.repeat(expected, 5)
@@ -157,6 +165,8 @@ def get_ht_function(fun_name, type_suffix):
157165
(np.float32, "float32"),
158166
(np.int16, "int16"),
159167
(np.uint16, "uint16"),
168+
(np.int8, "int8"),
169+
(np.uint8, "uint8"),
160170
],
161171
)
162172
class TestHelpFunctions:
@@ -197,7 +207,10 @@ def test_ismember_no(self, dtype, type_suffix):
197207
tm.assert_numpy_array_equal(result, expected)
198208

199209
def test_mode(self, dtype, type_suffix):
200-
N = 11111
210+
if dtype in (np.int8, np.uint8):
211+
N = 53
212+
else:
213+
N = 11111
201214
mode = get_ht_function("mode", type_suffix)
202215
values = np.repeat(np.arange(N).astype(dtype), 5)
203216
values[0] = 42

0 commit comments

Comments
 (0)