Skip to content

Commit 0144068

Browse files
committed
Rewrite &= -> = and simplify the initial blocking phase.
1 parent 62cf769 commit 0144068

File tree

1 file changed

+13
-14
lines changed

1 file changed

+13
-14
lines changed

driver/level3/level3_thread.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -344,12 +344,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
344344
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
345345
for (js = n_from, bufferside = 0; js < n_to; js += div_n, bufferside ++) {
346346

347-
/* Make sure if no one is using workspace */
348-
START_RPCC();
349-
for (i = 0; i < args -> nthreads; i++)
350-
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
351-
STOP_RPCC(waiting1);
352-
353347
#if defined(FUSED_GEMM) && !defined(TIMING)
354348

355349
/* Fused operation to copy region of B into workspace and apply kernel */
@@ -387,10 +381,15 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
387381
}
388382
#endif
389383

390-
/* Set flag so other threads can access local region of B */
391-
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++)
384+
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++) {
385+
/* Make sure if no one is using workspace */
386+
START_RPCC();
387+
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
388+
STOP_RPCC(waiting1);
389+
/* Set flag so other threads can access local region of B */
392390
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
393-
WMB;
391+
WMB;
392+
}
394393
}
395394

396395
/* Get regions of B from other threads and apply kernel */
@@ -426,13 +425,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
426425

427426
/* Clear synchronization flag if this thread is done with other region of B */
428427
if (m_to - m_from == min_i) {
429-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
428+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
430429
WMB;
431430
}
432431
}
433432
} while (current != mypos);
434433

435-
/* Iterate through steps of m
434+
/* Iterate through steps of m
436435
* Note: First step has already been finished */
437436
for(is = m_from + min_i; is < m_to; is += min_i){
438437
min_i = m_to - is;
@@ -462,14 +461,14 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
462461
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
463462
c, ldc, is, js);
464463
STOP_RPCC(kernel);
465-
464+
466465
#ifdef TIMING
467466
ops += 2 * min_i * MIN(range_n[current + 1] - js, div_n) * min_l;
468467
#endif
469-
468+
470469
/* Clear synchronization flag if this thread is done with region of B */
471470
if (is + min_i >= m_to) {
472-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
471+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
473472
WMB;
474473
}
475474
}

0 commit comments

Comments
 (0)