Skip to content

Commit 24e697e

Browse files
authored
Merge pull request #1970 from quickwritereader/develop
crot fix
2 parents 3e9fd63 + 43a4572 commit 24e697e

File tree

1 file changed

+54
-36
lines changed

1 file changed

+54
-36
lines changed

kernel/power/crot.c

Lines changed: 54 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
5555
"lxvd2x 51, %[i48], %[y_ptr] \n\t"
5656
"addi %[x_ptr], %[x_ptr], 64 \n\t"
5757
"addi %[y_ptr], %[y_ptr], 64 \n\t"
58-
"addic. %[temp_n], %[temp_n], -16 \n\t"
58+
"addic. %[temp_n], %[temp_n], -8 \n\t"
5959
"ble 2f \n\t"
6060
".p2align 5 \n\t"
6161
"1: \n\t"
@@ -103,7 +103,7 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
103103
"stxvd2x %x[x3], %[i48], %[y_ptr] \n\t"
104104
"addi %[x_ptr], %[x_ptr], 128 \n\t"
105105
"addi %[y_ptr], %[y_ptr], 128 \n\t"
106-
"addic. %[temp_n], %[temp_n], -16 \n\t"
106+
"addic. %[temp_n], %[temp_n], -8 \n\t"
107107
"bgt 1b \n\t"
108108
"2: \n\t"
109109
"xvmulsp 40, 32, 36 \n\t" // c * x
@@ -173,41 +173,59 @@ static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
173173

174174
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s)
175175
{
176-
BLASLONG i=0;
177-
BLASLONG ix=0,iy=0;
178-
FLOAT *x1=x;
179-
FLOAT *y1=y;
180-
FLOAT temp;
181-
if ( n <= 0 ) return(0);
182-
if ( (inc_x == 1) && (inc_y == 1) )
183-
{
184-
BLASLONG n1 = n & -8;
185-
if ( n1 > 0 )
186-
{
187-
crot_kernel_8(n1, x1, y1, c, s);
188-
i=n1;
189-
}
190-
while(i < n)
191-
{
192-
temp = c*x[i] + s*y[i] ;
193-
y[i] = c*y[i] - s*x[i] ;
194-
x[i] = temp ;
195-
i++ ;
196-
}
176+
BLASLONG i=0;
177+
BLASLONG ix=0,iy=0;
178+
FLOAT temp[2];
179+
BLASLONG inc_x2;
180+
BLASLONG inc_y2;
197181

198-
}
199-
else
200-
{
201-
while(i < n)
202-
{
203-
temp = c*x[ix] + s*y[iy] ;
204-
y[iy] = c*y[iy] - s*x[ix] ;
205-
x[ix] = temp ;
206-
ix += inc_x ;
207-
iy += inc_y ;
208-
i++ ;
209-
}
210-
}
182+
if ( n <= 0 ) return(0);
183+
184+
if ( (inc_x == 1) && (inc_y == 1) )
185+
{
186+
187+
BLASLONG n1 = n & -8;
188+
if ( n1 > 0 )
189+
{
190+
crot_kernel_8(n1, x, y, c, s);
191+
i=n1;
192+
ix=2*n1;
193+
}
194+
195+
while(i < n)
196+
{
197+
temp[0] = c*x[ix] + s*y[ix] ;
198+
temp[1] = c*x[ix+1] + s*y[ix+1] ;
199+
y[ix] = c*y[ix] - s*x[ix] ;
200+
y[ix+1] = c*y[ix+1] - s*x[ix+1] ;
201+
x[ix] = temp[0] ;
202+
x[ix+1] = temp[1] ;
203+
204+
ix += 2 ;
205+
i++ ;
206+
207+
}
208+
209+
}
210+
else
211+
{
212+
inc_x2 = 2 * inc_x ;
213+
inc_y2 = 2 * inc_y ;
214+
while(i < n)
215+
{
216+
temp[0] = c*x[ix] + s*y[iy] ;
217+
temp[1] = c*x[ix+1] + s*y[iy+1] ;
218+
y[iy] = c*y[iy] - s*x[ix] ;
219+
y[iy+1] = c*y[iy+1] - s*x[ix+1] ;
220+
x[ix] = temp[0] ;
221+
x[ix+1] = temp[1] ;
222+
223+
ix += inc_x2 ;
224+
iy += inc_y2 ;
225+
i++ ;
226+
227+
}
228+
}
211229
return(0);
212230
}
213231

0 commit comments

Comments
 (0)