Skip to content

Commit 0ffd3b2

Browse files
committed
introducing Int16HashTable and UInt16HashTable
1 parent 8975f06 commit 0ffd3b2

File tree

7 files changed

+71
-5
lines changed

7 files changed

+71
-5
lines changed

pandas/_libs/hashtable.pxd

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,20 @@ from numpy cimport intp_t, ndarray
33
from pandas._libs.khash cimport (
44
float32_t,
55
float64_t,
6+
int16_t,
67
int32_t,
78
int64_t,
89
kh_float32_t,
910
kh_float64_t,
11+
kh_int16_t,
1012
kh_int32_t,
1113
kh_int64_t,
1214
kh_pymap_t,
1315
kh_str_t,
16+
kh_uint16_t,
1417
kh_uint32_t,
1518
kh_uint64_t,
19+
uint16_t,
1620
uint32_t,
1721
uint64_t,
1822
)
@@ -46,6 +50,18 @@ cdef class Int32HashTable(HashTable):
4650
cpdef get_item(self, int32_t val)
4751
cpdef set_item(self, int32_t key, Py_ssize_t val)
4852

53+
cdef class UInt16HashTable(HashTable):
54+
cdef kh_uint16_t *table
55+
56+
cpdef get_item(self, uint16_t val)
57+
cpdef set_item(self, uint16_t key, Py_ssize_t val)
58+
59+
cdef class Int16HashTable(HashTable):
60+
cdef kh_int16_t *table
61+
62+
cpdef get_item(self, int16_t val)
63+
cpdef set_item(self, int16_t key, Py_ssize_t val)
64+
4965
cdef class Float64HashTable(HashTable):
5066
cdef kh_float64_t *table
5167

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1010
# name
1111
cimported_types = ['float32',
1212
'float64',
13+
'int16',
1314
'int32',
1415
'int64',
1516
'pymap',
1617
'str',
1718
'strbox',
19+
'uint16',
1820
'uint32',
1921
'uint64']
2022
}}
@@ -48,9 +50,11 @@ dtypes = [('Float64', 'float64', 'float64_t'),
4850
('Float32', 'float32', 'float32_t'),
4951
('Int64', 'int64', 'int64_t'),
5052
('Int32', 'int32', 'int32_t'),
53+
('Int16', 'int16', 'int16_t'),
5154
('String', 'string', 'char *'),
5255
('UInt64', 'uint64', 'uint64_t'),
53-
('UInt32', 'uint32', 'uint32_t')]
56+
('UInt32', 'uint32', 'uint32_t'),
57+
('UInt16', 'uint16', 'uint16_t')]
5458
}}
5559

5660
{{for name, dtype, c_type in dtypes}}
@@ -78,8 +82,10 @@ cdef inline void append_data_{{dtype}}({{name}}VectorData *data,
7882
ctypedef fused vector_data:
7983
Int64VectorData
8084
Int32VectorData
85+
Int16VectorData
8186
UInt64VectorData
8287
UInt32VectorData
88+
UInt16VectorData
8389
Float64VectorData
8490
Float32VectorData
8591
StringVectorData
@@ -99,7 +105,9 @@ dtypes = [('Float64', 'float64', 'float64_t'),
99105
('Int64', 'int64', 'int64_t'),
100106
('Float32', 'float32', 'float32_t'),
101107
('UInt32', 'uint32', 'uint32_t'),
102-
('Int32', 'int32', 'int32_t')]
108+
('Int32', 'int32', 'int32_t'),
109+
('UInt16', 'uint16', 'uint16_t'),
110+
('Int16', 'int16', 'int16_t')]
103111

104112
}}
105113

@@ -293,7 +301,9 @@ dtypes = [('Float64', 'float64', True, 'np.nan'),
293301
('Int64', 'int64', False, 'NPY_NAT'),
294302
('Float32', 'float32', True, 'np.nan'),
295303
('UInt32', 'uint32', False, 0),
296-
('Int32', 'int32', False, 0)]
304+
('Int32', 'int32', False, 0),
305+
('UInt16', 'uint16', False, 0),
306+
('Int16', 'int16', False, 0)]
297307

298308
}}
299309

pandas/_libs/hashtable_func_helper.pxi.in

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ dtypes = [('float64', 'float64', 'float64_t'),
1111
('float32', 'float32', 'float32_t'),
1212
('uint64', 'uint64', 'uint64_t'),
1313
('uint32', 'uint32', 'uint32_t'),
14+
('uint16', 'uint16', 'uint16_t'),
1415
('object', 'pymap', 'object'),
1516
('int64', 'int64', 'int64_t'),
16-
('int32', 'int32', 'int32_t')]
17+
('int32', 'int32', 'int32_t'),
18+
('int16', 'int16', 'int16_t')]
1719

1820
}}
1921

@@ -281,8 +283,10 @@ dtypes = [('float64', 'float64_t', 'float64', 'float64'),
281283
('float32', 'float32_t', 'float32', 'float32'),
282284
('int64', 'int64_t', 'int64', 'int64'),
283285
('int32', 'int32_t', 'int32', 'int32'),
286+
('int16', 'int16_t', 'int16', 'int16'),
284287
('uint64', 'uint64_t', 'uint64', 'uint64'),
285288
('uint32', 'uint32_t', 'uint32', 'uint32'),
289+
('uint16', 'uint16_t', 'uint16', 'uint16'),
286290
('object', 'object', 'pymap', 'object_')]
287291
}}
288292

pandas/_libs/khash.pxd

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
from cpython.object cimport PyObject
2-
from numpy cimport float32_t, float64_t, int32_t, int64_t, uint32_t, uint64_t
2+
from numpy cimport (
3+
float32_t,
4+
float64_t,
5+
int16_t,
6+
int32_t,
7+
int64_t,
8+
uint16_t,
9+
uint32_t,
10+
uint64_t,
11+
)
312

413

514
cdef extern from "khash_python.h":

pandas/_libs/khash_for_primitive_helper.pxi.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ primitive_types = [('int64', 'int64_t'),
1313
('int32', 'int32_t'),
1414
('uint32', 'uint32_t'),
1515
('float32', 'float32_t'),
16+
('int16', 'int16_t'),
17+
('uint16', 'uint16_t'),
1618
]
1719
}}
1820

pandas/_libs/src/klib/khash.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ typedef unsigned long long khuint64_t;
129129
typedef signed long long khint64_t;
130130
#endif
131131

132+
#if UINT_MAX == 0xffffu
133+
typedef unsigned int khint16_t;
134+
#elif USHRT_MAX == 0xffffu
135+
typedef unsigned short khint16_t;
136+
#endif
137+
132138
typedef double khfloat64_t;
133139
typedef double khfloat32_t;
134140

@@ -616,6 +622,17 @@ PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key)
616622
#define KHASH_MAP_INIT_INT64(name, khval_t) \
617623
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
618624

625+
/*! @function
626+
@abstract Instantiate a hash map containing 16bit-integer keys
627+
@param name Name of the hash table [symbol]
628+
@param khval_t Type of values [type]
629+
*/
630+
#define KHASH_MAP_INIT_INT16(name, khval_t) \
631+
KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
632+
633+
#define KHASH_MAP_INIT_UINT16(name, khval_t) \
634+
KHASH_INIT(name, khint16_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
635+
619636

620637
typedef const char *kh_cstr_t;
621638
/*! @function
@@ -641,12 +658,16 @@ typedef const char *kh_cstr_t;
641658
#define kh_exist_float32(h, k) (kh_exist(h, k))
642659
#define kh_exist_int32(h, k) (kh_exist(h, k))
643660
#define kh_exist_uint32(h, k) (kh_exist(h, k))
661+
#define kh_exist_int16(h, k) (kh_exist(h, k))
662+
#define kh_exist_uint16(h, k) (kh_exist(h, k))
644663

645664
KHASH_MAP_INIT_STR(str, size_t)
646665
KHASH_MAP_INIT_INT(int32, size_t)
647666
KHASH_MAP_INIT_UINT(uint32, size_t)
648667
KHASH_MAP_INIT_INT64(int64, size_t)
649668
KHASH_MAP_INIT_UINT64(uint64, size_t)
669+
KHASH_MAP_INIT_INT16(int16, size_t)
670+
KHASH_MAP_INIT_UINT16(uint16, size_t)
650671

651672

652673
#endif /* __AC_KHASH_H */

pandas/tests/libs/test_hashtable.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
(ht.Int32HashTable, np.int32),
1616
(ht.UInt32HashTable, np.uint32),
1717
(ht.Float32HashTable, np.float32),
18+
(ht.Int16HashTable, np.int16),
19+
(ht.UInt16HashTable, np.uint16),
1820
],
1921
)
2022
class TestHashTable:
@@ -153,6 +155,8 @@ def get_ht_function(fun_name, type_suffix):
153155
(np.int32, "int32"),
154156
(np.uint32, "uint32"),
155157
(np.float32, "float32"),
158+
(np.int16, "int16"),
159+
(np.uint16, "uint16"),
156160
],
157161
)
158162
class TestHelpFunctions:

0 commit comments

Comments
 (0)