@@ -228,8 +228,7 @@ iterative_cooley_tukey:
228
228
push r14
229
229
push r15
230
230
push rbx
231
- push rbp
232
- sub rsp , 40
231
+ sub rsp , 48
233
232
mov r12 , rdi
234
233
mov r13 , rsi
235
234
call bit_reverse # Bit reversing array
@@ -245,7 +244,8 @@ iter_ct_loop_i:
245
244
movsd xmm0 , two # Calculate stride = 2 ^( r14 )
246
245
cvtsi2sdq xmm1 , r14
247
246
call pow
248
- cvttsd2si rbp , xmm0
247
+ cvttsd2si r10 , xmm0
248
+ mov QWORD PTR [ rsp + 40 ], r10 # move stride to stack
249
249
movsd xmm1 , two_pi # Calculating cexp( - 2pi * I / stride)
250
250
divsd xmm1 , xmm0
251
251
pxor xmm0 , xmm0
@@ -261,15 +261,15 @@ iter_ct_loop_j:
261
261
movsd QWORD PTR [ rsp + 24 ], xmm4
262
262
movsd QWORD PTR [ rsp + 32 ], xmm5
263
263
xor rbx , rbx
264
- mov rax , rbp # Calculate stride / 2
264
+ mov rax , QWORD PTR [ rsp + 40 ] # Calculate stride / 2
265
265
sar rax , 1
266
266
iter_ct_loop_k:
267
267
cmp rbx , rax # Check if rbx is less then stride / 2
268
268
je iter_ct_end_k
269
269
mov r8 , r15 # Saving pointers to X [ k + j + stride / 2 ] and X [ k + j ]
270
270
add r8 , rbx
271
271
sal r8 , 4
272
- mov r9 , rbp
272
+ mov r9 , QWORD PTR [ rsp + 40 ]
273
273
sal r9 , 3
274
274
add r9 , r8
275
275
lea r9 , [ r12 + r9 ]
@@ -301,19 +301,18 @@ iter_ct_loop_k:
301
301
movsd QWORD PTR [ rsp + 24 ], xmm0 # Saving answer
302
302
movsd QWORD PTR [ rsp + 32 ], xmm1
303
303
add rbx , 1
304
- mov rax , rbp
304
+ mov rax , QWORD PTR [ rsp + 40 ]
305
305
sar rax , 1
306
306
jmp iter_ct_loop_k
307
307
iter_ct_end_k:
308
- add r15 , rbp
308
+ add r15 , QWORD PTR [ rsp + 40 ]
309
309
jmp iter_ct_loop_j
310
310
iter_ct_end_j:
311
311
add r14 , 1
312
312
mov rax , QWORD PTR [ rsp ]
313
313
jmp iter_ct_loop_i
314
314
iter_ct_end_i:
315
- add rsp , 40
316
- pop rbp
315
+ add rsp , 48
317
316
pop rbx
318
317
pop r15
319
318
pop r14
0 commit comments