From 78d9910236739e98a16244679bbd814f1d79ca7f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Feb 2019 20:59:48 +0100 Subject: [PATCH 1/3] Correct range_n limiting same bug as seen in #1388, somehow missed in corresponding PR #1389 --- driver/level2/trmv_thread.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 24b881a93b..00092e9569 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -346,8 +346,8 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; - + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; + } queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; queue[num_cpu].args = &args; @@ -386,8 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - if (range_n[num_cpu] > m) range_n[num_cpu] = m; - + if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; queue[num_cpu].args = &args; From e29b0cfcc439b1598ba26486763b3cfa46583a9e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Feb 2019 21:03:30 +0100 Subject: [PATCH 2/3] Allow multithreading TRMV again revert workaround introduced for issue #1332 as the actual cause appears to be my incorrect fix from #1262 (see #1388) --- interface/trmv.c | 5 +---- interface/ztrmv.c | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/interface/trmv.c b/interface/trmv.c index 7c40ae976f..2e52527a3c 100644 --- a/interface/trmv.c +++ b/interface/trmv.c @@ -218,11 +218,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMP -/* nthreads = num_cpu_avail(2); + nthreads = num_cpu_avail(2); -FIXME trmv_thread was found to be broken, see issue 1332 */ - nthreads = 1; - if (nthreads == 1) { #endif diff --git a/interface/ztrmv.c b/interface/ztrmv.c index 0e16632e06..4c47e9e913 100644 --- a/interface/ztrmv.c +++ b/interface/ztrmv.c @@ -239,9 +239,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, } else nthreads = 1; -/* FIXME TRMV multithreading appears to be broken, see issue 1332*/ - nthreads = 1; - if(nthreads > 1) { buffer_size = n > 16 ? 0 : n * 4 + 40; } From 45333d57931ddc64fb3e8a091e0616dd9528cef1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 19 Feb 2019 22:16:33 +0100 Subject: [PATCH 3/3] Fix error introduced during cleanup --- driver/level2/trmv_thread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 00092e9569..43eeb40d25 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -347,7 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; - } + queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; queue[num_cpu].args = &args; @@ -387,6 +387,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu; + queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; queue[num_cpu].args = &args;