@@ -82,8 +82,8 @@ static void kf_bfly2(
82
82
C_SUB ( Fout2 [0 ] , Fout [0 ] , t );
83
83
C_ADDTO ( Fout [0 ] , t );
84
84
85
- t .r = S_MUL (Fout2 [1 ].r + Fout2 [1 ].i , tw );
86
- t .i = S_MUL (Fout2 [1 ].i - Fout2 [1 ].r , tw );
85
+ t .r = S_MUL (ADD32_ovflw ( Fout2 [1 ].r , Fout2 [1 ].i ) , tw );
86
+ t .i = S_MUL (SUB32_ovflw ( Fout2 [1 ].i , Fout2 [1 ].r ) , tw );
87
87
C_SUB ( Fout2 [1 ] , Fout [1 ] , t );
88
88
C_ADDTO ( Fout [1 ] , t );
89
89
@@ -92,8 +92,8 @@ static void kf_bfly2(
92
92
C_SUB ( Fout2 [2 ] , Fout [2 ] , t );
93
93
C_ADDTO ( Fout [2 ] , t );
94
94
95
- t .r = S_MUL (Fout2 [3 ].i - Fout2 [3 ].r , tw );
96
- t .i = S_MUL (- Fout2 [3 ].i - Fout2 [3 ].r , tw );
95
+ t .r = S_MUL (SUB32_ovflw ( Fout2 [3 ].i , Fout2 [3 ].r ) , tw );
96
+ t .i = S_MUL (NEG32_ovflw ( ADD32_ovflw ( Fout2 [3 ].i , Fout2 [3 ].r )) , tw );
97
97
C_SUB ( Fout2 [3 ] , Fout [3 ] , t );
98
98
C_ADDTO ( Fout [3 ] , t );
99
99
Fout += 8 ;
@@ -126,10 +126,10 @@ static void kf_bfly4(
126
126
C_ADDTO ( * Fout , scratch1 );
127
127
C_SUB ( scratch1 , Fout [1 ] , Fout [3 ] );
128
128
129
- Fout [1 ].r = scratch0 .r + scratch1 .i ;
130
- Fout [1 ].i = scratch0 .i - scratch1 .r ;
131
- Fout [3 ].r = scratch0 .r - scratch1 .i ;
132
- Fout [3 ].i = scratch0 .i + scratch1 .r ;
129
+ Fout [1 ].r = ADD32_ovflw ( scratch0 .r , scratch1 .i ) ;
130
+ Fout [1 ].i = SUB32_ovflw ( scratch0 .i , scratch1 .r ) ;
131
+ Fout [3 ].r = SUB32_ovflw ( scratch0 .r , scratch1 .i ) ;
132
+ Fout [3 ].i = ADD32_ovflw ( scratch0 .i , scratch1 .r ) ;
133
133
Fout += 4 ;
134
134
}
135
135
} else {
@@ -160,10 +160,10 @@ static void kf_bfly4(
160
160
tw3 += fstride * 3 ;
161
161
C_ADDTO ( * Fout , scratch [3 ] );
162
162
163
- Fout [m ].r = scratch [5 ].r + scratch [4 ].i ;
164
- Fout [m ].i = scratch [5 ].i - scratch [4 ].r ;
165
- Fout [m3 ].r = scratch [5 ].r - scratch [4 ].i ;
166
- Fout [m3 ].i = scratch [5 ].i + scratch [4 ].r ;
163
+ Fout [m ].r = ADD32_ovflw ( scratch [5 ].r , scratch [4 ].i ) ;
164
+ Fout [m ].i = SUB32_ovflw ( scratch [5 ].i , scratch [4 ].r ) ;
165
+ Fout [m3 ].r = SUB32_ovflw ( scratch [5 ].r , scratch [4 ].i ) ;
166
+ Fout [m3 ].i = ADD32_ovflw ( scratch [5 ].i , scratch [4 ].r ) ;
167
167
++ Fout ;
168
168
}
169
169
}
@@ -212,18 +212,18 @@ static void kf_bfly3(
212
212
tw1 += fstride ;
213
213
tw2 += fstride * 2 ;
214
214
215
- Fout [m ].r = Fout -> r - HALF_OF (scratch [3 ].r );
216
- Fout [m ].i = Fout -> i - HALF_OF (scratch [3 ].i );
215
+ Fout [m ].r = SUB32_ovflw ( Fout -> r , HALF_OF (scratch [3 ].r ) );
216
+ Fout [m ].i = SUB32_ovflw ( Fout -> i , HALF_OF (scratch [3 ].i ) );
217
217
218
218
C_MULBYSCALAR ( scratch [0 ] , epi3 .i );
219
219
220
220
C_ADDTO (* Fout ,scratch [3 ]);
221
221
222
- Fout [m2 ].r = Fout [m ].r + scratch [0 ].i ;
223
- Fout [m2 ].i = Fout [m ].i - scratch [0 ].r ;
222
+ Fout [m2 ].r = ADD32_ovflw ( Fout [m ].r , scratch [0 ].i ) ;
223
+ Fout [m2 ].i = SUB32_ovflw ( Fout [m ].i , scratch [0 ].r ) ;
224
224
225
- Fout [m ].r -= scratch [0 ].i ;
226
- Fout [m ].i += scratch [0 ].r ;
225
+ Fout [m ].r = SUB32_ovflw ( Fout [ m ]. r , scratch [0 ].i ) ;
226
+ Fout [m ].i = ADD32_ovflw ( Fout [ m ]. i , scratch [0 ].r ) ;
227
227
228
228
++ Fout ;
229
229
} while (-- k );
@@ -282,22 +282,22 @@ static void kf_bfly5(
282
282
C_ADD ( scratch [8 ],scratch [2 ],scratch [3 ]);
283
283
C_SUB ( scratch [9 ],scratch [2 ],scratch [3 ]);
284
284
285
- Fout0 -> r += scratch [7 ].r + scratch [8 ].r ;
286
- Fout0 -> i += scratch [7 ].i + scratch [8 ].i ;
285
+ Fout0 -> r = ADD32_ovflw ( Fout0 -> r , ADD32_ovflw ( scratch [7 ].r , scratch [8 ].r )) ;
286
+ Fout0 -> i = ADD32_ovflw ( Fout0 -> i , ADD32_ovflw ( scratch [7 ].i , scratch [8 ].i )) ;
287
287
288
- scratch [5 ].r = scratch [0 ].r + S_MUL (scratch [7 ].r ,ya .r ) + S_MUL (scratch [8 ].r ,yb .r );
289
- scratch [5 ].i = scratch [0 ].i + S_MUL (scratch [7 ].i ,ya .r ) + S_MUL (scratch [8 ].i ,yb .r );
288
+ scratch [5 ].r = ADD32_ovflw ( scratch [0 ].r , ADD32_ovflw ( S_MUL (scratch [7 ].r ,ya .r ), S_MUL (scratch [8 ].r ,yb .r )) );
289
+ scratch [5 ].i = ADD32_ovflw ( scratch [0 ].i , ADD32_ovflw ( S_MUL (scratch [7 ].i ,ya .r ), S_MUL (scratch [8 ].i ,yb .r )) );
290
290
291
- scratch [6 ].r = S_MUL (scratch [10 ].i ,ya .i ) + S_MUL (scratch [9 ].i ,yb .i );
292
- scratch [6 ].i = - S_MUL (scratch [10 ].r ,ya .i ) - S_MUL (scratch [9 ].r ,yb .i );
291
+ scratch [6 ].r = ADD32_ovflw ( S_MUL (scratch [10 ].i ,ya .i ), S_MUL (scratch [9 ].i ,yb .i ) );
292
+ scratch [6 ].i = NEG32_ovflw ( ADD32_ovflw ( S_MUL (scratch [10 ].r ,ya .i ), S_MUL (scratch [9 ].r ,yb .i )) );
293
293
294
294
C_SUB (* Fout1 ,scratch [5 ],scratch [6 ]);
295
295
C_ADD (* Fout4 ,scratch [5 ],scratch [6 ]);
296
296
297
- scratch [11 ].r = scratch [0 ].r + S_MUL (scratch [7 ].r ,yb .r ) + S_MUL (scratch [8 ].r ,ya .r );
298
- scratch [11 ].i = scratch [0 ].i + S_MUL (scratch [7 ].i ,yb .r ) + S_MUL (scratch [8 ].i ,ya .r );
299
- scratch [12 ].r = - S_MUL (scratch [10 ].i ,yb .i ) + S_MUL (scratch [9 ].i ,ya . i );
300
- scratch [12 ].i = S_MUL (scratch [10 ].r ,yb .i ) - S_MUL (scratch [9 ].r ,ya .i );
297
+ scratch [11 ].r = ADD32_ovflw ( scratch [0 ].r , ADD32_ovflw ( S_MUL (scratch [7 ].r ,yb .r ), S_MUL (scratch [8 ].r ,ya .r )) );
298
+ scratch [11 ].i = ADD32_ovflw ( scratch [0 ].i , ADD32_ovflw ( S_MUL (scratch [7 ].i ,yb .r ), S_MUL (scratch [8 ].i ,ya .r )) );
299
+ scratch [12 ].r = SUB32_ovflw ( S_MUL (scratch [9 ].i ,ya .i ), S_MUL (scratch [10 ].i ,yb . i ) );
300
+ scratch [12 ].i = SUB32_ovflw ( S_MUL (scratch [10 ].r ,yb .i ), S_MUL (scratch [9 ].r ,ya .i ) );
301
301
302
302
C_ADD (* Fout2 ,scratch [11 ],scratch [12 ]);
303
303
C_SUB (* Fout3 ,scratch [11 ],scratch [12 ]);
0 commit comments