Skip to content

Commit 44de64f

Browse files
committed
small fixes to fft.s
1 parent a747b4d commit 44de64f

File tree

1 file changed

+8
-9
lines changed
  • contents/cooley_tukey/code/asm-x64

1 file changed

+8
-9
lines changed

contents/cooley_tukey/code/asm-x64/fft.s

+8-9
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,7 @@ iterative_cooley_tukey:
228228
push r14
229229
push r15
230230
push rbx
231-
push rbp
232-
sub rsp, 40
231+
sub rsp, 48
233232
mov r12, rdi
234233
mov r13, rsi
235234
call bit_reverse # Bit reversing array
@@ -245,7 +244,8 @@ iter_ct_loop_i:
245244
movsd xmm0, two # Calculate stride = 2^(r14)
246245
cvtsi2sdq xmm1, r14
247246
call pow
248-
cvttsd2si rbp, xmm0
247+
cvttsd2si r10, xmm0
248+
mov QWORD PTR [rsp + 40], r10# move stride to stack
249249
movsd xmm1, two_pi # Calculating cexp(-2pi * I / stride)
250250
divsd xmm1, xmm0
251251
pxor xmm0, xmm0
@@ -261,15 +261,15 @@ iter_ct_loop_j:
261261
movsd QWORD PTR [rsp + 24], xmm4
262262
movsd QWORD PTR [rsp + 32], xmm5
263263
xor rbx, rbx
264-
mov rax, rbp # Calculate stride / 2
264+
mov rax, QWORD PTR [rsp + 40]# Calculate stride / 2
265265
sar rax, 1
266266
iter_ct_loop_k:
267267
cmp rbx, rax # Check if rbx is less then stride / 2
268268
je iter_ct_end_k
269269
mov r8, r15 # Saving pointers to X[k + j + stride / 2] and X[k + j]
270270
add r8, rbx
271271
sal r8, 4
272-
mov r9, rbp
272+
mov r9, QWORD PTR [rsp + 40]
273273
sal r9, 3
274274
add r9, r8
275275
lea r9, [r12 + r9]
@@ -301,19 +301,18 @@ iter_ct_loop_k:
301301
movsd QWORD PTR [rsp + 24], xmm0 # Saving answer
302302
movsd QWORD PTR [rsp + 32], xmm1
303303
add rbx, 1
304-
mov rax, rbp
304+
mov rax, QWORD PTR [rsp + 40]
305305
sar rax, 1
306306
jmp iter_ct_loop_k
307307
iter_ct_end_k:
308-
add r15, rbp
308+
add r15, QWORD PTR [rsp + 40]
309309
jmp iter_ct_loop_j
310310
iter_ct_end_j:
311311
add r14, 1
312312
mov rax, QWORD PTR [rsp]
313313
jmp iter_ct_loop_i
314314
iter_ct_end_i:
315-
add rsp, 40
316-
pop rbp
315+
add rsp, 48
317316
pop rbx
318317
pop r15
319318
pop r14

0 commit comments

Comments
 (0)