@@ -55,7 +55,7 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
55
55
"lxvd2x 51, %[i48], %[y_ptr] \n\t"
56
56
"addi %[x_ptr], %[x_ptr], 64 \n\t"
57
57
"addi %[y_ptr], %[y_ptr], 64 \n\t"
58
- "addic. %[temp_n], %[temp_n], -16 \n\t"
58
+ "addic. %[temp_n], %[temp_n], -8 \n\t"
59
59
"ble 2f \n\t"
60
60
".p2align 5 \n\t"
61
61
"1: \n\t"
@@ -103,7 +103,7 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
103
103
"stxvd2x %x[x3], %[i48], %[y_ptr] \n\t"
104
104
"addi %[x_ptr], %[x_ptr], 128 \n\t"
105
105
"addi %[y_ptr], %[y_ptr], 128 \n\t"
106
- "addic. %[temp_n], %[temp_n], -16 \n\t"
106
+ "addic. %[temp_n], %[temp_n], -8 \n\t"
107
107
"bgt 1b \n\t"
108
108
"2: \n\t"
109
109
"xvmulsp 40, 32, 36 \n\t" // c * x
@@ -173,41 +173,59 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
173
173
174
174
int CNAME (BLASLONG n , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y , FLOAT c , FLOAT s )
175
175
{
176
- BLASLONG i = 0 ;
177
- BLASLONG ix = 0 ,iy = 0 ;
178
- FLOAT * x1 = x ;
179
- FLOAT * y1 = y ;
180
- FLOAT temp ;
181
- if ( n <= 0 ) return (0 );
182
- if ( (inc_x == 1 ) && (inc_y == 1 ) )
183
- {
184
- BLASLONG n1 = n & -8 ;
185
- if ( n1 > 0 )
186
- {
187
- crot_kernel_8 (n1 , x1 , y1 , c , s );
188
- i = n1 ;
189
- }
190
- while (i < n )
191
- {
192
- temp = c * x [i ] + s * y [i ] ;
193
- y [i ] = c * y [i ] - s * x [i ] ;
194
- x [i ] = temp ;
195
- i ++ ;
196
- }
176
+ BLASLONG i = 0 ;
177
+ BLASLONG ix = 0 ,iy = 0 ;
178
+ FLOAT temp [2 ];
179
+ BLASLONG inc_x2 ;
180
+ BLASLONG inc_y2 ;
197
181
198
- }
199
- else
200
- {
201
- while (i < n )
202
- {
203
- temp = c * x [ix ] + s * y [iy ] ;
204
- y [iy ] = c * y [iy ] - s * x [ix ] ;
205
- x [ix ] = temp ;
206
- ix += inc_x ;
207
- iy += inc_y ;
208
- i ++ ;
209
- }
210
- }
182
+ if ( n <= 0 ) return (0 );
183
+
184
+ if ( (inc_x == 1 ) && (inc_y == 1 ) )
185
+ {
186
+
187
+ BLASLONG n1 = n & -8 ;
188
+ if ( n1 > 0 )
189
+ {
190
+ crot_kernel_8 (n1 , x , y , c , s );
191
+ i = n1 ;
192
+ ix = 2 * n1 ;
193
+ }
194
+
195
+ while (i < n )
196
+ {
197
+ temp [0 ] = c * x [ix ] + s * y [ix ] ;
198
+ temp [1 ] = c * x [ix + 1 ] + s * y [ix + 1 ] ;
199
+ y [ix ] = c * y [ix ] - s * x [ix ] ;
200
+ y [ix + 1 ] = c * y [ix + 1 ] - s * x [ix + 1 ] ;
201
+ x [ix ] = temp [0 ] ;
202
+ x [ix + 1 ] = temp [1 ] ;
203
+
204
+ ix += 2 ;
205
+ i ++ ;
206
+
207
+ }
208
+
209
+ }
210
+ else
211
+ {
212
+ inc_x2 = 2 * inc_x ;
213
+ inc_y2 = 2 * inc_y ;
214
+ while (i < n )
215
+ {
216
+ temp [0 ] = c * x [ix ] + s * y [iy ] ;
217
+ temp [1 ] = c * x [ix + 1 ] + s * y [iy + 1 ] ;
218
+ y [iy ] = c * y [iy ] - s * x [ix ] ;
219
+ y [iy + 1 ] = c * y [iy + 1 ] - s * x [ix + 1 ] ;
220
+ x [ix ] = temp [0 ] ;
221
+ x [ix + 1 ] = temp [1 ] ;
222
+
223
+ ix += inc_x2 ;
224
+ iy += inc_y2 ;
225
+ i ++ ;
226
+
227
+ }
228
+ }
211
229
return (0 );
212
230
}
213
231
0 commit comments