@@ -4239,17 +4239,57 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
4239
4239
REDUCE(result); \
4240
4240
} while(0)
4241
4241
4242
- if (Py_SIZE (b ) <= FIVEARY_CUTOFF ) {
4242
+ i = Py_SIZE (b );
4243
+ digit bi = i ? b -> ob_digit [i - 1 ] : 0 ;
4244
+ digit bit ;
4245
+ if (i <= 1 && bi <= 3 ) {
4246
+ /* aim for minimal overhead */
4247
+ if (bi >= 2 ) {
4248
+ MULT (a , a , z );
4249
+ if (bi == 3 ) {
4250
+ MULT (z , a , z );
4251
+ }
4252
+ }
4253
+ else if (bi == 1 ) {
4254
+ /* Multiplying by 1 serves two purposes: if `a` is of an int
4255
+ * subclass, makes the result an int (e.g., pow(False, 1) returns
4256
+ * 0 instead of False), and potentially reduces `a` by the modulus.
4257
+ */
4258
+ MULT (a , z , z );
4259
+ }
4260
+ /* else bi is 0, and z==1 is correct */
4261
+ }
4262
+ else if (i <= FIVEARY_CUTOFF ) {
4243
4263
/* Left-to-right binary exponentiation (HAC Algorithm 14.79) */
4244
4264
/* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */
4245
- for (i = Py_SIZE (b ) - 1 ; i >= 0 ; -- i ) {
4246
- digit bi = b -> ob_digit [i ];
4247
4265
4248
- for (j = (digit )1 << (PyLong_SHIFT - 1 ); j != 0 ; j >>= 1 ) {
4266
+ /* Find the first significant exponent bit. Search right to left
4267
+ * because we're primarily trying to cut overhead for small powers.
4268
+ */
4269
+ assert (bi ); /* else there is no significant bit */
4270
+ Py_INCREF (a );
4271
+ Py_DECREF (z );
4272
+ z = a ;
4273
+ for (bit = 2 ; ; bit <<= 1 ) {
4274
+ if (bit > bi ) { /* found the first bit */
4275
+ assert ((bi & bit ) == 0 );
4276
+ bit >>= 1 ;
4277
+ assert (bi & bit );
4278
+ break ;
4279
+ }
4280
+ }
4281
+ for (-- i , bit >>= 1 ;;) {
4282
+ for (; bit != 0 ; bit >>= 1 ) {
4249
4283
MULT (z , z , z );
4250
- if (bi & j )
4284
+ if (bi & bit ) {
4251
4285
MULT (z , a , z );
4286
+ }
4287
+ }
4288
+ if (-- i < 0 ) {
4289
+ break ;
4252
4290
}
4291
+ bi = b -> ob_digit [i ];
4292
+ bit = (digit )1 << (PyLong_SHIFT - 1 );
4253
4293
}
4254
4294
}
4255
4295
else {
0 commit comments