Skip to content

Commit c628c6f

Browse files
authored
Merge pull request #1612 from oon3m0oo/cpus
Fixed a few more unnecessary calls to num_cpu_avail.
2 parents 67d81ab + c2545b0 commit c628c6f

18 files changed

+59
-92
lines changed

interface/axpy.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@
4040
#include "common.h"
4141
#ifdef FUNCTION_PROFILE
4242
#include "functable.h"
43-
#endif
43+
#endif
4444
#if defined(Z13)
4545
#define MULTI_THREAD_MINIMAL 200000
4646
#else
47-
#define MULTI_THREAD_MINIMAL 10000
47+
#define MULTI_THREAD_MINIMAL 10000
4848
#endif
4949
#ifndef CBLAS
5050

@@ -83,17 +83,15 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
8383
if (incy < 0) y -= (n - 1) * incy;
8484

8585
#ifdef SMP
86-
nthreads = num_cpu_avail(1);
87-
8886
//disable multi-thread when incx==0 or incy==0
8987
//In that case, the threads would be dependent.
90-
if (incx == 0 || incy == 0)
91-
nthreads = 1;
92-
88+
//
9389
//Temporarily work-around the low performance issue with small imput size &
9490
//multithreads.
95-
if (n <= MULTI_THREAD_MINIMAL)
91+
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
9692
nthreads = 1;
93+
else
94+
nthreads = num_cpu_avail(1);
9795

9896
if (nthreads == 1) {
9997
#endif

interface/scal.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
7676

7777

7878
#ifdef SMP
79-
nthreads = num_cpu_avail(1);
80-
8179
if (n <= 1048576 )
8280
nthreads = 1;
81+
else
82+
nthreads = num_cpu_avail(1);
83+
8384

8485
if (nthreads == 1) {
8586
#endif

interface/zaxpy.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,16 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
9090
if (incy < 0) y -= (n - 1) * incy * 2;
9191

9292
#ifdef SMP
93-
nthreads = num_cpu_avail(1);
94-
9593
//disable multi-thread when incx==0 or incy==0
9694
//In that case, the threads would be dependent.
97-
if (incx == 0 || incy == 0)
98-
nthreads = 1;
99-
100-
//Work around the low performance issue with small imput size &
95+
//
96+
//Temporarily work-around the low performance issue with small imput size &
10197
//multithreads.
102-
if (n <= MULTI_THREAD_MINIMAL) {
98+
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
10399
nthreads = 1;
104-
}
100+
else
101+
nthreads = num_cpu_avail(1);
102+
105103
if (nthreads == 1) {
106104
#endif
107105

interface/zscal.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
9090
FUNCTION_PROFILE_START();
9191

9292
#ifdef SMP
93-
nthreads = num_cpu_avail(1);
94-
9593
if ( n <= 1048576 )
9694
nthreads = 1;
95+
else
96+
nthreads = num_cpu_avail(1);
9797

9898
if (nthreads == 1) {
9999
#endif

interface/zswap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,12 @@ FLOAT *y = (FLOAT*)vy;
7979
if (incy < 0) y -= (n - 1) * incy * 2;
8080

8181
#ifdef SMP
82-
nthreads = num_cpu_avail(1);
83-
8482
//disable multi-thread when incx==0 or incy==0
8583
//In that case, the threads would be dependent.
8684
if (incx == 0 || incy == 0)
8785
nthreads = 1;
86+
else
87+
nthreads = num_cpu_avail(1);
8888

8989
if (nthreads == 1) {
9090
#endif

kernel/arm64/casum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
233233
FLOAT asum = 0.0;
234234

235235
#if defined(SMP)
236-
nthreads = num_cpu_avail(1);
237-
238-
if (inc_x == 0)
239-
nthreads = 1;
240-
241-
if (n <= 10000)
236+
if (inc_x == 0 || n <= 10000)
242237
nthreads = 1;
238+
else
239+
nthreads = num_cpu_avail(1);
243240

244241
if (nthreads == 1) {
245242
asum = casum_compute(n, x, inc_x);

kernel/arm64/copy_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,10 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
183183
if (n <= 0) return 0;
184184

185185
#if defined(SMP)
186-
nthreads = num_cpu_avail(1);
187-
188-
if (inc_x == 0)
189-
nthreads = 1;
190-
191-
if (n <= 10000)
186+
if (inc_x == 0 || n <= 10000)
192187
nthreads = 1;
188+
else
189+
nthreads = num_cpu_avail(1);
193190

194191
if (nthreads == 1) {
195192
do_copy(n, x, inc_x, y, inc_y);

kernel/arm64/dasum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
228228
FLOAT asum = 0.0;
229229

230230
#if defined(SMP)
231-
nthreads = num_cpu_avail(1);
232-
233-
if (inc_x == 0)
234-
nthreads = 1;
235-
236-
if (n <= 10000)
231+
if (inc_x == 0 || n <= 10000)
237232
nthreads = 1;
233+
else
234+
nthreads = num_cpu_avail(1);
238235

239236
if (nthreads == 1) {
240237
asum = dasum_compute(n, x, inc_x);

kernel/arm64/dot_thunderx2t99.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
199199
" faddp "DOTF", v0.2d \n"
200200
#endif /* !defined(DSDOT) */
201201

202-
#else /* !defined(DOUBLE) */
202+
#else /* !defined(DOUBLE) */
203203
#define KERNEL_F1 \
204204
" ldr "TMPX", ["X"] \n" \
205205
" ldr "TMPY", ["Y"] \n" \
@@ -384,13 +384,10 @@ RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y
384384
RETURN_TYPE dot = 0.0;
385385

386386
#if defined(SMP)
387-
nthreads = num_cpu_avail(1);
388-
389-
if (inc_x == 0 || inc_y == 0)
390-
nthreads = 1;
391-
392-
if (n <= 10000)
387+
if (inc_x == 0 || inc_y == 0 || n <= 10000)
393388
nthreads = 1;
389+
else
390+
nthreads = num_cpu_avail(1);
394391

395392
if (nthreads == 1) {
396393
dot = dot_compute(n, x, inc_x, y, inc_y);

kernel/arm64/dznrm2_thunderx2t99.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
328328
if (n <= 0 || inc_x <= 0) return 0.0;
329329

330330
#if defined(SMP)
331-
nthreads = num_cpu_avail(1);
332-
333331
if (n <= 10000)
334332
nthreads = 1;
333+
else
334+
nthreads = num_cpu_avail(1);
335335

336336
if (nthreads == 1) {
337337
nrm2_compute(n, x, inc_x, &ssq, &scale);

kernel/arm64/dznrm2_thunderx2t99_fast.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
235235
if (n <= 0 || inc_x <= 0) return 0.0;
236236

237237
#if defined(SMP)
238-
nthreads = num_cpu_avail(1);
239-
240238
if (n <= 10000)
241239
nthreads = 1;
240+
else
241+
nthreads = num_cpu_avail(1);
242242

243243
if (nthreads == 1) {
244244
nrm2 = nrm2_compute(n, x, inc_x);

kernel/arm64/iamax_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,13 +321,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
321321
BLASLONG max_index = 0;
322322

323323
#if defined(SMP)
324-
nthreads = num_cpu_avail(1);
325-
326-
if (inc_x == 0)
327-
nthreads = 1;
328-
329-
if (n <= 10000)
324+
if (inc_x == 0 || n <= 10000)
330325
nthreads = 1;
326+
else
327+
nthreads = num_cpu_avail(1);
331328

332329
if (nthreads == 1) {
333330
max_index = iamax_compute(n, x, inc_x);

kernel/arm64/izamax_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,13 +330,10 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
330330
BLASLONG max_index = 0;
331331

332332
#if defined(SMP)
333-
nthreads = num_cpu_avail(1);
334-
335-
if (inc_x == 0)
336-
nthreads = 1;
337-
338-
if (n <= 10000)
333+
if (inc_x == 0 || n <= 10000)
339334
nthreads = 1;
335+
else
336+
nthreads = num_cpu_avail(1);
340337

341338
if (nthreads == 1) {
342339
max_index = izamax_compute(n, x, inc_x);

kernel/arm64/sasum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
230230
FLOAT asum = 0.0;
231231

232232
#if defined(SMP)
233-
nthreads = num_cpu_avail(1);
234-
235-
if (inc_x == 0)
236-
nthreads = 1;
237-
238-
if (n <= 10000)
233+
if (inc_x == 0 || n <= 10000)
239234
nthreads = 1;
235+
else
236+
nthreads = num_cpu_avail(1);
240237

241238
if (nthreads == 1) {
242239
asum = sasum_compute(n, x, inc_x);

kernel/arm64/scnrm2_thunderx2t99.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
318318
if (n <= 0 || inc_x <= 0) return 0.0;
319319

320320
#if defined(SMP)
321-
nthreads = num_cpu_avail(1);
322-
323321
if (n <= 10000)
324322
nthreads = 1;
323+
else
324+
nthreads = num_cpu_avail(1);
325325

326326
if (nthreads == 1) {
327327
nrm2_double = nrm2_compute(n, x, inc_x);

kernel/arm64/zasum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
230230
FLOAT asum = 0.0;
231231

232232
#if defined(SMP)
233-
nthreads = num_cpu_avail(1);
234-
235-
if (inc_x == 0)
236-
nthreads = 1;
237-
238-
if (n <= 10000)
233+
if (inc_x == 0 || n <= 10000)
239234
nthreads = 1;
235+
else
236+
nthreads = num_cpu_avail(1);
240237

241238
if (nthreads == 1) {
242239
asum = zasum_compute(n, x, inc_x);

kernel/arm64/zdot_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,10 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
317317
CIMAG(zdot) = 0.0;
318318

319319
#if defined(SMP)
320-
nthreads = num_cpu_avail(1);
321-
322-
if (inc_x == 0 || inc_y == 0)
323-
nthreads = 1;
324-
325-
if (n <= 10000)
320+
if (inc_x == 0 || inc_y == 0 || n <= 10000)
326321
nthreads = 1;
322+
else
323+
nthreads = num_cpu_avail(1);
327324

328325
if (nthreads == 1) {
329326
zdot_compute(n, x, inc_x, y, inc_y, &zdot);

kernel/x86_64/ddot.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
#include "common.h"
3030

3131

32-
#if defined(BULLDOZER)
32+
#if defined(BULLDOZER)
3333
#include "ddot_microk_bulldozer-2.c"
3434
#elif defined(STEAMROLLER) || defined(EXCAVATOR)
3535
#include "ddot_microk_steamroller-2.c"
3636
#elif defined(PILEDRIVER)
3737
#include "ddot_microk_piledriver-2.c"
38-
#elif defined(NEHALEM)
38+
#elif defined(NEHALEM)
3939
#include "ddot_microk_nehalem-2.c"
4040
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
4141
#include "ddot_microk_haswell-2.c"
@@ -110,7 +110,7 @@ static FLOAT dot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
110110
FLOAT temp1 = 0.0;
111111
FLOAT temp2 = 0.0;
112112

113-
BLASLONG n1 = n & -4;
113+
BLASLONG n1 = n & -4;
114114

115115
while(i < n1)
116116
{
@@ -169,13 +169,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
169169
FLOAT dot = 0.0;
170170

171171
#if defined(SMP)
172-
nthreads = num_cpu_avail(1);
173-
174-
if (inc_x == 0 || inc_y == 0)
175-
nthreads = 1;
176-
177-
if (n <= 10000)
172+
if (inc_x == 0 || inc_y == 0 || n <= 10000)
178173
nthreads = 1;
174+
else
175+
nthreads = num_cpu_avail(1);
179176

180177
if (nthreads == 1) {
181178
dot = dot_compute(n, x, inc_x, y, inc_y);

0 commit comments

Comments
 (0)