@@ -104,6 +104,21 @@ cdef _take_2d_int64(ndarray[int64_t, ndim=2] values,
104
104
result[i, j] = values[i, indexer[i, j]]
105
105
return result
106
106
107
+ cdef _take_2d_uint64(ndarray[uint64_t, ndim= 2 ] values,
108
+ object idx):
109
+ cdef:
110
+ Py_ssize_t i, j, N, K
111
+ ndarray[Py_ssize_t, ndim= 2 , cast= True ] indexer = idx
112
+ ndarray[uint64_t, ndim= 2 ] result
113
+ object val
114
+
115
+ N, K = (< object > values).shape
116
+ result = np.empty_like(values)
117
+ for i in range (N):
118
+ for j in range (K):
119
+ result[i, j] = values[i, indexer[i, j]]
120
+ return result
121
+
107
122
cdef _take_2d_object(ndarray[object , ndim= 2 ] values,
108
123
object idx):
109
124
cdef:
@@ -286,6 +301,83 @@ def rank_1d_int64(object in_arr, ties_method='average', ascending=True,
286
301
return ranks
287
302
288
303
304
+ def rank_1d_uint64 (object in_arr , ties_method = ' average' , ascending = True ,
305
+ na_option = ' keep' , pct = False ):
306
+ """
307
+ Fast NaN-friendly version of scipy.stats.rankdata
308
+ """
309
+
310
+ cdef:
311
+ Py_ssize_t i, j, n, dups = 0 , total_tie_count = 0
312
+ ndarray[uint64_t] sorted_data, values
313
+ ndarray[float64_t] ranks
314
+ ndarray[int64_t] argsorted
315
+ int64_t val, nan_value
316
+ float64_t sum_ranks = 0
317
+ bint keep_na
318
+ int tiebreak = 0
319
+ float count = 0.0
320
+ tiebreak = tiebreakers[ties_method]
321
+
322
+ keep_na = na_option == ' keep'
323
+
324
+ # uint64 has no NaN value, so we just
325
+ # create the "values" array and proceed.
326
+ values = np.asarray(in_arr)
327
+
328
+ n = len (values)
329
+ ranks = np.empty(n, dtype = ' f8' )
330
+
331
+ # py2.5/win32 hack, can't pass i8
332
+ if tiebreak == TIEBREAK_FIRST:
333
+ # need to use a stable sort here
334
+ _as = values.argsort(kind = ' mergesort' )
335
+ if not ascending:
336
+ tiebreak = TIEBREAK_FIRST_DESCENDING
337
+ else :
338
+ _as = values.argsort()
339
+
340
+ if not ascending:
341
+ _as = _as[::- 1 ]
342
+
343
+ sorted_data = values.take(_as)
344
+ argsorted = _as.astype(' i8' )
345
+
346
+ for i in range (n):
347
+ sum_ranks += i + 1
348
+ dups += 1
349
+ val = sorted_data[i]
350
+ if (val == nan_value) and keep_na:
351
+ ranks[argsorted[i]] = nan
352
+ continue
353
+ count += 1.0
354
+ if i == n - 1 or fabs(sorted_data[i + 1 ] - val) > 0 :
355
+ if tiebreak == TIEBREAK_AVERAGE:
356
+ for j in range (i - dups + 1 , i + 1 ):
357
+ ranks[argsorted[j]] = sum_ranks / dups
358
+ elif tiebreak == TIEBREAK_MIN:
359
+ for j in range (i - dups + 1 , i + 1 ):
360
+ ranks[argsorted[j]] = i - dups + 2
361
+ elif tiebreak == TIEBREAK_MAX:
362
+ for j in range (i - dups + 1 , i + 1 ):
363
+ ranks[argsorted[j]] = i + 1
364
+ elif tiebreak == TIEBREAK_FIRST:
365
+ for j in range (i - dups + 1 , i + 1 ):
366
+ ranks[argsorted[j]] = j + 1
367
+ elif tiebreak == TIEBREAK_FIRST_DESCENDING:
368
+ for j in range (i - dups + 1 , i + 1 ):
369
+ ranks[argsorted[j]] = 2 * i - j - dups + 2
370
+ elif tiebreak == TIEBREAK_DENSE:
371
+ total_tie_count += 1
372
+ for j in range (i - dups + 1 , i + 1 ):
373
+ ranks[argsorted[j]] = total_tie_count
374
+ sum_ranks = dups = 0
375
+ if pct:
376
+ return ranks / count
377
+ else :
378
+ return ranks
379
+
380
+
289
381
def rank_2d_float64 (object in_arr , axis = 0 , ties_method = ' average' ,
290
382
ascending = True , na_option = ' keep' , pct = False ):
291
383
"""
@@ -472,6 +564,93 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
472
564
return ranks
473
565
474
566
567
+ def rank_2d_uint64 (object in_arr , axis = 0 , ties_method = ' average' ,
568
+ ascending = True , na_option = ' keep' , pct = False ):
569
+ """
570
+ Fast NaN-friendly version of scipy.stats.rankdata
571
+ """
572
+
573
+ cdef:
574
+ Py_ssize_t i, j, z, k, n, dups = 0 , total_tie_count = 0
575
+ ndarray[float64_t, ndim= 2 ] ranks
576
+ ndarray[int64_t, ndim= 2 ] argsorted
577
+ ndarray[uint64_t, ndim= 2 , cast= True ] values
578
+ int64_t val, nan_value
579
+ float64_t sum_ranks = 0
580
+ bint keep_na = 0
581
+ int tiebreak = 0
582
+ float count = 0.0
583
+ tiebreak = tiebreakers[ties_method]
584
+
585
+ keep_na = na_option == ' keep'
586
+
587
+ in_arr = np.asarray(in_arr)
588
+
589
+ # uint64 has no NaN value, so we just
590
+ # create the "values" array and proceed.
591
+ if axis == 0 :
592
+ values = in_arr.T.copy()
593
+ else :
594
+ values = in_arr.copy()
595
+
596
+ n, k = (< object > values).shape
597
+ ranks = np.empty((n, k), dtype = ' f8' )
598
+
599
+ if tiebreak == TIEBREAK_FIRST:
600
+ # need to use a stable sort here
601
+ _as = values.argsort(axis = 1 , kind = ' mergesort' )
602
+ if not ascending:
603
+ tiebreak = TIEBREAK_FIRST_DESCENDING
604
+ else :
605
+ _as = values.argsort(1 )
606
+
607
+ if not ascending:
608
+ _as = _as[:, ::- 1 ]
609
+
610
+ values = _take_2d_uint64(values, _as)
611
+ argsorted = _as.astype(' i8' )
612
+
613
+ for i in range (n):
614
+ dups = sum_ranks = 0
615
+ total_tie_count = 0
616
+ count = 0.0
617
+ for j in range (k):
618
+ sum_ranks += j + 1
619
+ dups += 1
620
+ val = values[i, j]
621
+ if val == nan_value and keep_na:
622
+ ranks[i, argsorted[i, j]] = nan
623
+ continue
624
+ count += 1.0
625
+ if j == k - 1 or fabs(values[i, j + 1 ] - val) > FP_ERR:
626
+ if tiebreak == TIEBREAK_AVERAGE:
627
+ for z in range (j - dups + 1 , j + 1 ):
628
+ ranks[i, argsorted[i, z]] = sum_ranks / dups
629
+ elif tiebreak == TIEBREAK_MIN:
630
+ for z in range (j - dups + 1 , j + 1 ):
631
+ ranks[i, argsorted[i, z]] = j - dups + 2
632
+ elif tiebreak == TIEBREAK_MAX:
633
+ for z in range (j - dups + 1 , j + 1 ):
634
+ ranks[i, argsorted[i, z]] = j + 1
635
+ elif tiebreak == TIEBREAK_FIRST:
636
+ for z in range (j - dups + 1 , j + 1 ):
637
+ ranks[i, argsorted[i, z]] = z + 1
638
+ elif tiebreak == TIEBREAK_FIRST_DESCENDING:
639
+ for z in range (j - dups + 1 , j + 1 ):
640
+ ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2
641
+ elif tiebreak == TIEBREAK_DENSE:
642
+ total_tie_count += 1
643
+ for z in range (j - dups + 1 , j + 1 ):
644
+ ranks[i, argsorted[i, z]] = total_tie_count
645
+ sum_ranks = dups = 0
646
+ if pct:
647
+ ranks[i, :] /= count
648
+ if axis == 0 :
649
+ return ranks.T
650
+ else :
651
+ return ranks
652
+
653
+
475
654
def rank_1d_generic (object in_arr , bint retry = 1 , ties_method = ' average' ,
476
655
ascending = True , na_option = ' keep' , pct = False ):
477
656
"""
0 commit comments