Skip to content

Commit 6caa403

Browse files
committed
Merge pull request #744 from jeromerobert/bug731
Bug731
2 parents 692d9c8 + 14db1ca commit 6caa403

File tree

4 files changed

+19
-6
lines changed

4 files changed

+19
-6
lines changed

CONTRIBUTORS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ In chronological order:
121121
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
122122
ARMv8 support.
123123

124+
* Jerome Robert <[email protected]>
125+
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
126+
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
127+
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
128+
124129
* Dan Kortschak
125130
* [2015-01-07] Added test for drotmg bug #484.
126131

interface/ger.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,11 @@ void CNAME(enum CBLAS_ORDER order,
174174
STACK_ALLOC(m, FLOAT, buffer);
175175

176176
#ifdef SMPTEST
177-
nthreads = num_cpu_avail(2);
178-
177+
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
178+
if(1L * m * n > 24L * GEMM_MULTITHREAD_THRESHOLD)
179+
nthreads = num_cpu_avail(2);
180+
else
181+
nthreads = 1;
179182

180183
if (nthreads == 1) {
181184
#endif

interface/swap.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
7777
if (incy < 0) y -= (n - 1) * incy;
7878

7979
#ifdef SMP
80-
nthreads = num_cpu_avail(1);
8180

8281
//disable multi-thread when incx==0 or incy==0
8382
//In that case, the threads would be dependent.
84-
if (incx == 0 || incy == 0)
85-
nthreads = 1;
83+
if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))
84+
nthreads = 1;
85+
else
86+
nthreads = num_cpu_avail(1);
8687

8788
if (nthreads == 1) {
8889
#endif

interface/zger.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,11 @@ void CNAME(enum CBLAS_ORDER order,
213213
buffer = (FLOAT *)blas_memory_alloc(1);
214214

215215
#ifdef SMPTEST
216-
nthreads = num_cpu_avail(2);
216+
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
217+
if(1L * m * n > 3L * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD)
218+
nthreads = num_cpu_avail(2);
219+
else
220+
nthreads = 1;
217221

218222
if (nthreads == 1) {
219223
#endif

0 commit comments

Comments
 (0)