Skip to content

Commit 177b78c

Browse files
authored
Issue1388 (#1389)
* Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 - should fix #1388 * Calculation of range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262 * Calculation of chunk range limits was ignoring num_cpu bug introduced by me in #1262
1 parent 281a2b9 commit 177b78c

File tree

6 files changed

+14
-14
lines changed

6 files changed

+14
-14
lines changed

driver/level2/gbmv_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
230230

231231
#ifndef TRANSA
232232
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
233-
if (range_m[num_cpu] > m) range_m[num_cpu] = m;
233+
if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu;
234234
#else
235235
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
236-
if (range_m[num_cpu] > n) range_m[num_cpu] = n;
236+
if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu;
237237
#endif
238238

239239
queue[num_cpu].mode = mode;

driver/level2/sbmv_thread.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
246246

247247
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
248248
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
249-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
249+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
250250

251251
queue[num_cpu].mode = mode;
252252
queue[num_cpu].routine = sbmv_kernel;
@@ -286,7 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
286286

287287
range_m[num_cpu + 1] = range_m[num_cpu] + width;
288288
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
289-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
289+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
290290

291291
queue[num_cpu].mode = mode;
292292
queue[num_cpu].routine = sbmv_kernel;
@@ -318,7 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
318318
range_m[num_cpu + 1] = range_m[num_cpu] + width;
319319

320320
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
321-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
321+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
322322

323323
queue[num_cpu].mode = mode;
324324
queue[num_cpu].routine = sbmv_kernel;

driver/level2/spmv_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
246246

247247
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
248248
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
249-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
249+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
250250

251251
queue[num_cpu].mode = mode;
252252
queue[num_cpu].routine = spmv_kernel;
@@ -286,7 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
286286

287287
range_m[num_cpu + 1] = range_m[num_cpu] + width;
288288
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
289-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
289+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
290290

291291
queue[num_cpu].mode = mode;
292292
queue[num_cpu].routine = spmv_kernel;

driver/level2/symv_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
177177

178178
range_m[num_cpu + 1] = range_m[num_cpu] + width;
179179
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
180-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
180+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
181181

182182
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
183183
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
@@ -226,7 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
226226

227227
range_m[num_cpu + 1] = range_m[num_cpu] + width;
228228
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
229-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
229+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
230230

231231
queue[num_cpu].mode = mode;
232232
queue[num_cpu].routine = symv_kernel;

driver/level2/tbmv_thread.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
288288

289289
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
290290
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
291-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
291+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
292292

293293
queue[num_cpu].mode = mode;
294294
queue[num_cpu].routine = trmv_kernel;
@@ -328,7 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
328328

329329
range_m[num_cpu + 1] = range_m[num_cpu] + width;
330330
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
331-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
331+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
332332

333333
queue[num_cpu].mode = mode;
334334
queue[num_cpu].routine = trmv_kernel;
@@ -358,7 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
358358

359359
range_m[num_cpu + 1] = range_m[num_cpu] + width;
360360
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
361-
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
361+
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
362362

363363
queue[num_cpu].mode = mode;
364364
queue[num_cpu].routine = trmv_kernel;

driver/level2/tpmv_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
303303

304304
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
305305
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
306-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
306+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
307307

308308
queue[num_cpu].mode = mode;
309309
queue[num_cpu].routine = tpmv_kernel;
@@ -343,7 +343,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
343343

344344
range_m[num_cpu + 1] = range_m[num_cpu] + width;
345345
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
346-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
346+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
347347

348348
queue[num_cpu].mode = mode;
349349
queue[num_cpu].routine = tpmv_kernel;

0 commit comments

Comments
 (0)