@@ -26,6 +26,18 @@ from pandas._libs import algos, hashtable as _hash
26
26
from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
27
27
from pandas._libs.missing import checknull
28
28
29
+ # Python 2 vs Python 3
30
+ try :
31
+ from thread import allocate_lock as _thread_allocate_lock
32
+ except :
33
+ try :
34
+ from _thread import allocate_lock as _thread_allocate_lock
35
+ except :
36
+ try :
37
+ from dummy_thread import allocate_lock as _thread_allocate_lock
38
+ except :
39
+ from _dummy_thread import allocate_lock as _thread_allocate_lock
40
+
29
41
cdef int64_t iNaT = util.get_nat()
30
42
31
43
@@ -72,6 +84,9 @@ cpdef object get_value_box(ndarray arr, object loc):
72
84
# Don't populate hash tables in monotonic indexes larger than this
73
85
_SIZE_CUTOFF = 1000000
74
86
87
+ # Used in _ensure_mapping_populated to ensure is_unique behaves correctly
88
+ # in multi-threaded code, see gh-21150
89
+ _mapping_populated_lock = _thread_allocate_lock()
75
90
76
91
cdef class IndexEngine:
77
92
@@ -258,17 +273,17 @@ cdef class IndexEngine:
258
273
259
274
cdef inline _ensure_mapping_populated(self ):
260
275
# this populates the mapping
261
- # if its not already populated
276
+ # if it is not already populated
262
277
# also satisfies the need_unique_check
263
278
264
- if not self .is_mapping_populated :
265
-
266
- values = self ._get_index_values()
267
- self .mapping = self ._make_hash_table(len (values))
268
- self ._call_map_locations(values)
279
+ with _mapping_populated_lock :
280
+ if not self .is_mapping_populated:
281
+ values = self ._get_index_values()
282
+ self .mapping = self ._make_hash_table(len (values))
283
+ self ._call_map_locations(values)
269
284
270
- if len (self .mapping) == len (values):
271
- self .unique = 1
285
+ if len (self .mapping) == len (values):
286
+ self .unique = 1
272
287
273
288
self .need_unique_check = 0
274
289
0 commit comments