@@ -581,18 +581,18 @@ static const uint3 init_fastdiv_values(uint32_t d) {
581
581
return make_uint3 (mp, L, d);
582
582
}
583
583
584
- static __device__ __forceinline__ uint32_t fastdiv (uint32_t n, const uint3 div_consts ) {
585
- // expects div_consts to contain <mp, L, divisor> in <x, y, z>
586
- // div_consts .z is unused and optimized away by the compiler.
584
+ static __device__ __forceinline__ uint32_t fastdiv (uint32_t n, const uint3 fastdiv_values ) {
585
+ // expects fastdiv_values to contain <mp, L, divisor> in <x, y, z>
586
+ // fastdiv_values .z is unused and optimized away by the compiler.
587
587
// Compute high 32 bits of n * mp
588
- const uint32_t hi = __umulhi (n, div_consts .x );
588
+ const uint32_t hi = __umulhi (n, fastdiv_values .x );
589
589
// add n, apply bit shift
590
- return (hi + n) >> div_consts .y ;
590
+ return (hi + n) >> fastdiv_values .y ;
591
591
}
592
592
593
- static __device__ __forceinline__ uint32_t fastmodulo (uint32_t n, const uint3 modulo_consts ) {
594
- // expects modulo_consts to contain <mp, L, divisor> in <x, y, z> (see init_fastdiv_values)
595
- return n - fastdiv (n, modulo_consts ) * modulo_consts .z ;
593
+ static __device__ __forceinline__ uint32_t fastmodulo (uint32_t n, const uint3 fastdiv_values ) {
594
+ // expects fastdiv_values to contain <mp, L, divisor> in <x, y, z> (see init_fastdiv_values)
595
+ return n - fastdiv (n, fastdiv_values ) * fastdiv_values .z ;
596
596
}
597
597
598
598
typedef void (*dequantize_kernel_t )(const void * vx, const int64_t ib, const int iqs, float2 & v);
0 commit comments